chiark / gitweb /
6b09588653aef388ff0c7291b8392908482a5c22
[secnet.git] / site.c
1 /* site.c - manage communication with a remote network site */
2
3 /*
4  * This file is part of secnet.
5  * See README for full list of copyright holders.
6  *
7  * secnet is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3 of the License, or
10  * (at your option) any later version.
11  * 
12  * secnet is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * version 3 along with secnet; if not, see
19  * https://www.gnu.org/licenses/gpl.html.
20  */
21
22 /* The 'site' code doesn't know anything about the structure of the
23    packets it's transmitting.  In fact, under the new netlink
24    configuration scheme it doesn't need to know anything at all about
25    IP addresses, except how to contact its peer.  This means it could
26    potentially be used to tunnel other protocols too (IPv6, IPX, plain
27    old Ethernet frames) if appropriate netlink code can be written
28    (and that ought not to be too hard, eg. using the TUN/TAP device to
29    pretend to be an Ethernet interface).  */
30
31 /* At some point in the future the netlink code will be asked for
32    configuration information to go in the PING/PONG packets at the end
33    of the key exchange. */
34
35 #include "secnet.h"
36 #include <stdio.h>
37 #include <string.h>
38 #include <limits.h>
39 #include <assert.h>
40 #include <sys/socket.h>
41
42 #include <sys/mman.h>
43 #include "util.h"
44 #include "unaligned.h"
45 #include "magic.h"
46
47 #define SETUP_BUFFER_LEN 2048
48
49 #define DEFAULT_KEY_LIFETIME                  (3600*1000) /* [ms] */
50 #define DEFAULT_KEY_RENEGOTIATE_GAP           (5*60*1000) /* [ms] */
51 #define DEFAULT_SETUP_RETRIES 5
52 #define DEFAULT_SETUP_RETRY_INTERVAL             (2*1000) /* [ms] */
53 #define DEFAULT_WAIT_TIME                       (20*1000) /* [ms] */
54
55 #define DEFAULT_MOBILE_KEY_LIFETIME      (2*24*3600*1000) /* [ms] */
56 #define DEFAULT_MOBILE_KEY_RENEGOTIATE_GAP (12*3600*1000) /* [ms] */
57 #define DEFAULT_MOBILE_SETUP_RETRIES 30
58 #define DEFAULT_MOBILE_SETUP_RETRY_INTERVAL      (1*1000) /* [ms] */
59 #define DEFAULT_MOBILE_WAIT_TIME                (10*1000) /* [ms] */
60
61 #define DEFAULT_MOBILE_PEER_EXPIRY            (2*60)      /* [s] */
62
63 /* Each site can be in one of several possible states. */
64
65 /* States:
66    SITE_STOP         - nothing is allowed to happen; tunnel is down;
67                        all session keys have been erased
68      -> SITE_RUN upon external instruction
69    SITE_RUN          - site up, maybe with valid key
70      -> SITE_RESOLVE upon outgoing packet and no valid key
71          we start name resolution for the other end of the tunnel
72      -> SITE_SENTMSG2 upon valid incoming message 1 and suitable time
73          we send an appropriate message 2
74    SITE_RESOLVE      - waiting for name resolution
75      -> SITE_SENTMSG1 upon successful resolution
76          we send an appropriate message 1
77      -> SITE_SENTMSG2 upon valid incoming message 1 (then abort resolution)
78          we abort resolution and 
79      -> SITE_WAIT on timeout or resolution failure
80    SITE_SENTMSG1
81      -> SITE_SENTMSG2 upon valid incoming message 1 from higher priority end
82      -> SITE_SENTMSG3 upon valid incoming message 2
83      -> SITE_WAIT on timeout
84    SITE_SENTMSG2
85      -> SITE_SENTMSG4 upon valid incoming message 3
86      -> SITE_WAIT on timeout
87    SITE_SENTMSG3
88      -> SITE_SENTMSG5 upon valid incoming message 4
89      -> SITE_WAIT on timeout
90    SITE_SENTMSG4
91      -> SITE_RUN upon valid incoming message 5
92      -> SITE_WAIT on timeout
93    SITE_SENTMSG5
94      -> SITE_RUN upon valid incoming message 6
95      -> SITE_WAIT on timeout
96    SITE_WAIT         - failed to establish key; do nothing for a while
97      -> SITE_RUN on timeout
98    */
99
100 #define SITE_STOP     0
101 #define SITE_RUN      1
102 #define SITE_RESOLVE  2
103 #define SITE_SENTMSG1 3
104 #define SITE_SENTMSG2 4
105 #define SITE_SENTMSG3 5
106 #define SITE_SENTMSG4 6
107 #define SITE_SENTMSG5 7
108 #define SITE_WAIT     8
109
110 int32_t site_max_start_pad = 4*4;
111
112 static cstring_t state_name(uint32_t state)
113 {
114     switch (state) {
115     case 0: return "STOP";
116     case 1: return "RUN";
117     case 2: return "RESOLVE";
118     case 3: return "SENTMSG1";
119     case 4: return "SENTMSG2";
120     case 5: return "SENTMSG3";
121     case 6: return "SENTMSG4";
122     case 7: return "SENTMSG5";
123     case 8: return "WAIT";
124     default: return "*bad state*";
125     }
126 }
127
128 #define NONCELEN 8
129
130 #define LOG_UNEXPECTED    0x00000001
131 #define LOG_SETUP_INIT    0x00000002
132 #define LOG_SETUP_TIMEOUT 0x00000004
133 #define LOG_ACTIVATE_KEY  0x00000008
134 #define LOG_TIMEOUT_KEY   0x00000010
135 #define LOG_SEC           0x00000020
136 #define LOG_STATE         0x00000040
137 #define LOG_DROP          0x00000080
138 #define LOG_DUMP          0x00000100
139 #define LOG_ERROR         0x00000400
140 #define LOG_PEER_ADDRS    0x00000800
141
142 static struct flagstr log_event_table[]={
143     { "unexpected", LOG_UNEXPECTED },
144     { "setup-init", LOG_SETUP_INIT },
145     { "setup-timeout", LOG_SETUP_TIMEOUT },
146     { "activate-key", LOG_ACTIVATE_KEY },
147     { "timeout-key", LOG_TIMEOUT_KEY },
148     { "security", LOG_SEC },
149     { "state-change", LOG_STATE },
150     { "packet-drop", LOG_DROP },
151     { "dump-packets", LOG_DUMP },
152     { "errors", LOG_ERROR },
153     { "peer-addrs", LOG_PEER_ADDRS },
154     { "default", LOG_SETUP_INIT|LOG_SETUP_TIMEOUT|
155       LOG_ACTIVATE_KEY|LOG_TIMEOUT_KEY|LOG_SEC|LOG_ERROR },
156     { "all", 0xffffffff },
157     { NULL, 0 }
158 };
159
160
161 /***** TRANSPORT PEERS declarations *****/
162
163 /* Details of "mobile peer" semantics:
164
165    - We use the same data structure for the different configurations,
166      but manage it with different algorithms.
167    
168    - We record up to mobile_peers_max peer address/port numbers
169      ("peers") for key setup, and separately up to mobile_peers_max
170      for data transfer.
171
172    - In general, we make a new set of addrs (see below) when we start
173      a new key exchange; the key setup addrs become the data transport
174      addrs when key setup complets.
175
176    If our peer is mobile:
177
178    - We send to all recent addresses of incoming packets, plus
179      initially all configured addresses (which we also expire).
180
181    - So, we record addrs of good incoming packets, as follows:
182       1. expire any peers last seen >120s ("mobile-peer-expiry") ago
183       2. add the peer of the just received packet to the applicable list
184          (possibly evicting the oldest entries to make room)
185      NB that we do not expire peers until an incoming packet arrives.
186
187    - If the peer has a configured address or name, we record them the
188      same way, but only as a result of our own initiation of key
189      setup.  (We might evict some incoming packet addrs to make room.)
190
191    - The default number of addrs to keep is 3, or 4 if we have a
192      configured name or address.  That's space for two configured
193      addresses (one IPv6 and one IPv4), plus two received addresses.
194
195    - Outgoing packets are sent to every recorded address in the
196      applicable list.  Any unsupported[1] addresses are deleted from
197      the list right away.  (This should only happen to configured
198      addresses, of course, but there is no need to check that.)
199
200    - When we successfully complete a key setup, we merge the key setup
201      peers into the data transfer peers.
202
203    [1] An unsupported address is one for whose AF we don't have a
204      socket (perhaps because we got EAFNOSUPPORT or some such) or for
205      which sendto gives ENETUNREACH.
206
207    If neither end is mobile:
208
209    - When peer initiated the key exchange, we use the incoming packet
210      address.
211
212    - When we initiate the key exchange, we try configured addresses
213      until we get one which isn't unsupported then fixate on that.
214
215    - When we complete a key setup, we replace the data transport peers
216      with those from the key setup.
217
218    If we are mobile:
219
220    - We can't tell when local network setup changes so we can't cache
221      the unsupported addrs and completely remove the spurious calls to
222      sendto, but we can optimise things a bit by deprioritising addrs
223      which seem to be unsupported.
224
225    - Use only configured addresses.  (Except, that if our peer
226      initiated a key exchange we use the incoming packet address until
227      our name resolution completes.)
228
229    - When we send a packet, try each address in turn; if addr
230      supported, put that address to the end of the list for future
231      packets, and go onto the next address.
232
233    - When we complete a key setup, we replace the data transport peers
234      with those from the key setup.
235
236    */
237
238 typedef struct {
239     struct timeval last;
240     struct comm_addr addr;
241 } transport_peer;
242
243 typedef struct {
244 /* configuration information */
245 /* runtime information */
246     int npeers;
247     transport_peer peers[MAX_PEER_ADDRS];
248 } transport_peers;
249
250 /* Basic operations on transport peer address sets */
251 static void transport_peers_clear(struct site *st, transport_peers *peers);
252 static int transport_peers_valid(transport_peers *peers);
253 static void transport_peers_copy(struct site *st, transport_peers *dst,
254                                  const transport_peers *src);
255
256 /* Record address of incoming setup packet; resp. data packet. */
257 static void transport_setup_msgok(struct site *st, const struct comm_addr *a);
258 static void transport_data_msgok(struct site *st, const struct comm_addr *a);
259
260 /* Initialise the setup addresses.  Called before we send the first
261  * packet in a key exchange.  If we are the initiator, as a result of
262  * resolve completing (or being determined not to be relevant) or an
263  * incoming PROD; if we are the responder, as a result of the MSG1. */
264 static bool_t transport_compute_setupinit_peers(struct site *st,
265         const struct comm_addr *configured_addrs /* 0 if none or not found */,
266         int n_configured_addrs /* 0 if none or not found */,
267         const struct comm_addr *incoming_packet_addr /* 0 if none */);
268
269 /* Called if we are the responder in a key setup, when the resolve
270  * completes.  transport_compute_setupinit_peers will hvae been called
271  * earlier.  If _complete is called, we are still doing the key setup
272  * (and we should use the new values for both the rest of the key
273  * setup and the ongoing data exchange); if _tardy is called, the key
274  * setup is done (either completed or not) and only the data peers are
275  * relevant */
276 static void transport_resolve_complete(struct site *st,
277         const struct comm_addr *addrs, int naddrs);
278 static void transport_resolve_complete_tardy(struct site *st,
279         const struct comm_addr *addrs, int naddrs);
280
281 static void transport_xmit(struct site *st, transport_peers *peers,
282                            struct buffer_if *buf, bool_t candebug);
283
284  /***** END of transport peers declarations *****/
285
286
287 struct data_key {
288     struct transform_inst_if *transform;
289     uint64_t key_timeout; /* End of life of current key */
290     uint32_t remote_session_id;
291 };
292
293 struct site {
294     closure_t cl;
295     struct site_if ops;
296 /* configuration information */
297     string_t localname;
298     string_t remotename;
299     bool_t keepalive;
300     bool_t local_mobile, peer_mobile; /* Mobile client support */
301     int32_t transport_peers_max;
302     string_t tunname; /* localname<->remotename by default, used in logs */
303     cstring_t *addresses; /* DNS name or address(es) for bootstrapping, optional */
304     int remoteport; /* Port for bootstrapping, optional */
305     uint32_t mtu_target;
306     struct netlink_if *netlink;
307     struct comm_if **comms;
308     struct comm_clientinfo **commclientinfos;
309     int ncomms;
310     struct resolver_if *resolver;
311     struct log_if *log;
312     struct random_if *random;
313     struct rsaprivkey_if *privkey;
314     struct rsapubkey_if *pubkey;
315     struct transform_if **transforms;
316     int ntransforms;
317     struct dh_if *dh;
318     struct hash_if *hash;
319
320     uint32_t index; /* Index of this site */
321     uint32_t local_capabilities;
322     int32_t setup_retries; /* How many times to send setup packets */
323     int32_t setup_retry_interval; /* Initial timeout for setup packets */
324     int32_t wait_timeout_mean; /* How long to wait if setup unsuccessful */
325     int32_t mobile_peer_expiry; /* How long to remember 2ary addresses */
326     int32_t key_lifetime; /* How long a key lasts once set up */
327     int32_t key_renegotiate_time; /* If we see traffic (or a keepalive)
328                                       after this time, initiate a new
329                                       key exchange */
330
331     bool_t our_name_later; /* our name > peer name */
332     uint32_t log_events;
333
334 /* runtime information */
335     uint32_t state;
336     uint64_t now; /* Most recently seen time */
337     bool_t allow_send_prod;
338     bool_t msg1_crossed_logged;
339     int resolving_count;
340     int resolving_n_results_all;
341     int resolving_n_results_stored;
342     struct comm_addr resolving_results[MAX_PEER_ADDRS];
343
344     /* The currently established session */
345     struct data_key current;
346     struct data_key auxiliary_key;
347     bool_t auxiliary_is_new;
348     uint64_t renegotiate_key_time; /* When we can negotiate a new key */
349     uint64_t auxiliary_renegotiate_key_time;
350     transport_peers peers; /* Current address(es) of peer for data traffic */
351
352     /* The current key setup protocol exchange.  We can only be
353        involved in one of these at a time.  There's a potential for
354        denial of service here (the attacker keeps sending a setup
355        packet; we keep trying to continue the exchange, and have to
356        timeout before we can listen for another setup packet); perhaps
357        we should keep a list of 'bad' sources for setup packets. */
358     uint32_t remote_capabilities;
359     uint16_t remote_adv_mtu;
360     struct transform_if *chosen_transform;
361     uint32_t setup_session_id;
362     transport_peers setup_peers;
363     uint8_t localN[NONCELEN]; /* Nonces for key exchange */
364     uint8_t remoteN[NONCELEN];
365     struct buffer_if buffer; /* Current outgoing key exchange packet */
366     struct buffer_if scratch;
367     int32_t retries; /* Number of retries remaining */
368     uint64_t timeout; /* Timeout for current state */
369     uint8_t *dhsecret;
370     uint8_t *sharedsecret;
371     uint32_t sharedsecretlen, sharedsecretallocd;
372     struct transform_inst_if *new_transform; /* For key setup/verify */
373 };
374
375 static uint32_t event_log_priority(struct site *st, uint32_t event)
376 {
377     if (!(event&st->log_events))
378         return 0;
379     switch(event) {
380     case LOG_UNEXPECTED:    return M_INFO;
381     case LOG_SETUP_INIT:    return M_INFO;
382     case LOG_SETUP_TIMEOUT: return M_NOTICE;
383     case LOG_ACTIVATE_KEY:  return M_INFO;
384     case LOG_TIMEOUT_KEY:   return M_INFO;
385     case LOG_SEC:           return M_SECURITY;
386     case LOG_STATE:         return M_DEBUG;
387     case LOG_DROP:          return M_DEBUG;
388     case LOG_DUMP:          return M_DEBUG;
389     case LOG_ERROR:         return M_ERR;
390     case LOG_PEER_ADDRS:    return M_DEBUG;
391     default:                return M_ERR;
392     }
393 }
394
395 static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
396 FORMAT(printf,3,0);
397 static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
398 {
399     uint32_t class;
400
401     class=event_log_priority(st, event);
402     if (class) {
403         slilog_part(st->log,class,"%s: ",st->tunname);
404         vslilog_part(st->log,class,msg,ap);
405         slilog_part(st->log,class,"\n");
406     }
407 }
408
409 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
410 FORMAT(printf,3,4);
411 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
412 {
413     va_list ap;
414     va_start(ap,msg);
415     vslog(st,event,msg,ap);
416     va_end(ap);
417 }
418
419 static void logtimeout(struct site *st, const char *fmt, ...)
420 FORMAT(printf,2,3);
421 static void logtimeout(struct site *st, const char *fmt, ...)
422 {
423     uint32_t class=event_log_priority(st,LOG_SETUP_TIMEOUT);
424     if (!class)
425         return;
426
427     va_list ap;
428     va_start(ap,fmt);
429
430     slilog_part(st->log,class,"%s: ",st->tunname);
431     vslilog_part(st->log,class,fmt,ap);
432
433     const char *delim;
434     int i;
435     for (i=0, delim=" (tried ";
436          i<st->setup_peers.npeers;
437          i++, delim=", ") {
438         transport_peer *peer=&st->setup_peers.peers[i];
439         const char *s=comm_addr_to_string(&peer->addr);
440         slilog_part(st->log,class,"%s%s",delim,s);
441     }
442
443     slilog_part(st->log,class,")\n");
444     va_end(ap);
445 }
446
447 static void set_link_quality(struct site *st);
448 static void delete_keys(struct site *st, cstring_t reason, uint32_t loglevel);
449 static void delete_one_key(struct site *st, struct data_key *key,
450                            const char *reason /* may be 0 meaning don't log*/,
451                            const char *which /* ignored if !reasonn */,
452                            uint32_t loglevel /* ignored if !reasonn */);
453 static bool_t initiate_key_setup(struct site *st, cstring_t reason,
454                                  const struct comm_addr *prod_hint);
455 static void enter_state_run(struct site *st);
456 static bool_t enter_state_resolve(struct site *st);
457 static void decrement_resolving_count(struct site *st, int by);
458 static bool_t enter_new_state(struct site *st,uint32_t next);
459 static void enter_state_wait(struct site *st);
460 static void activate_new_key(struct site *st);
461
462 static bool_t is_transform_valid(struct transform_inst_if *transform)
463 {
464     return transform && transform->valid(transform->st);
465 }
466
467 static bool_t current_valid(struct site *st)
468 {
469     return is_transform_valid(st->current.transform);
470 }
471
472 #define DEFINE_CALL_TRANSFORM(fwdrev)                                   \
473 static transform_apply_return                                           \
474 call_transform_##fwdrev(struct site *st,                                \
475                                    struct transform_inst_if *transform, \
476                                    struct buffer_if *buf,               \
477                                    const char **errmsg)                 \
478 {                                                                       \
479     if (!is_transform_valid(transform)) {                               \
480         *errmsg="transform not set up";                                 \
481         return transform_apply_err;                                     \
482     }                                                                   \
483     return transform->fwdrev(transform->st,buf,errmsg);                 \
484 }
485
486 DEFINE_CALL_TRANSFORM(forwards)
487 DEFINE_CALL_TRANSFORM(reverse)
488
489 static void dispose_transform(struct transform_inst_if **transform_var)
490 {
491     struct transform_inst_if *transform=*transform_var;
492     if (transform) {
493         transform->delkey(transform->st);
494         transform->destroy(transform->st);
495     }
496     *transform_var = 0;
497 }    
498
499 #define CHECK_AVAIL(b,l) do { if ((b)->size<(l)) return False; } while(0)
500 #define CHECK_EMPTY(b) do { if ((b)->size!=0) return False; } while(0)
501 #define CHECK_TYPE(b,t) do { uint32_t type; \
502     CHECK_AVAIL((b),4); \
503     type=buf_unprepend_uint32((b)); \
504     if (type!=(t)) return False; } while(0)
505
506 static _Bool type_is_msg34(uint32_t type)
507 {
508     return
509         type == LABEL_MSG3 ||
510         type == LABEL_MSG3BIS ||
511         type == LABEL_MSG4;
512 }
513
514 struct parsedname {
515     int32_t len;
516     uint8_t *name;
517     struct buffer_if extrainfo;
518 };
519
520 struct msg {
521     uint8_t *hashstart;
522     uint32_t dest;
523     uint32_t source;
524     struct parsedname remote;
525     struct parsedname local;
526     uint32_t remote_capabilities;
527     uint16_t remote_mtu;
528     int capab_transformnum;
529     uint8_t *nR;
530     uint8_t *nL;
531     int32_t pklen;
532     char *pk;
533     int32_t hashlen;
534     int32_t siglen;
535     char *sig;
536 };
537
538 static int32_t wait_timeout(struct site *st) {
539     return st->wait_timeout_mean;
540 }
541
542 static _Bool set_new_transform(struct site *st, char *pk)
543 {
544     _Bool ok;
545
546     /* Make room for the shared key */
547     st->sharedsecretlen=st->chosen_transform->keylen?:st->dh->ceil_len;
548     assert(st->sharedsecretlen);
549     if (st->sharedsecretlen > st->sharedsecretallocd) {
550         st->sharedsecretallocd=st->sharedsecretlen;
551         st->sharedsecret=safe_realloc_ary(st->sharedsecret,1,
552                                           st->sharedsecretallocd,
553                                           "site:sharedsecret");
554     }
555
556     /* Generate the shared key */
557     st->dh->makeshared(st->dh->st,st->dhsecret,st->dh->len,pk,
558                        st->sharedsecret,st->sharedsecretlen);
559
560     /* Set up the transform */
561     struct transform_if *generator=st->chosen_transform;
562     struct transform_inst_if *generated=generator->create(generator->st);
563     ok = generated->setkey(generated->st,st->sharedsecret,
564                            st->sharedsecretlen,st->our_name_later);
565
566     dispose_transform(&st->new_transform);
567     if (!ok) return False;
568     st->new_transform=generated;
569
570     slog(st,LOG_SETUP_INIT,"key exchange negotiated transform"
571          " %d (capabilities ours=%#"PRIx32" theirs=%#"PRIx32")",
572          st->chosen_transform->capab_transformnum,
573          st->local_capabilities, st->remote_capabilities);
574     return True;
575 }
576
577 struct xinfoadd {
578     int32_t lenpos, afternul;
579 };
580 static void append_string_xinfo_start(struct buffer_if *buf,
581                                       struct xinfoadd *xia,
582                                       const char *str)
583     /* Helps construct one of the names with additional info as found
584      * in MSG1..4.  Call this function first, then append all the
585      * desired extra info (not including the nul byte) to the buffer,
586      * then call append_string_xinfo_done. */
587 {
588     xia->lenpos = buf->size;
589     buf_append_string(buf,str);
590     buf_append_uint8(buf,0);
591     xia->afternul = buf->size;
592 }
593 static void append_string_xinfo_done(struct buffer_if *buf,
594                                      struct xinfoadd *xia)
595 {
596     /* we just need to adjust the string length */
597     if (buf->size == xia->afternul) {
598         /* no extra info, strip the nul too */
599         buf_unappend_uint8(buf);
600     } else {
601         put_uint16(buf->start+xia->lenpos, buf->size-(xia->lenpos+2));
602     }
603 }
604
605 /* Build any of msg1 to msg4. msg5 and msg6 are built from the inside
606    out using a transform of config data supplied by netlink */
607 static bool_t generate_msg(struct site *st, uint32_t type, cstring_t what)
608 {
609     void *hst;
610     uint8_t *hash;
611     string_t dhpub, sig;
612
613     st->retries=st->setup_retries;
614     BUF_ALLOC(&st->buffer,what);
615     buffer_init(&st->buffer,0);
616     buf_append_uint32(&st->buffer,
617         (type==LABEL_MSG1?0:st->setup_session_id));
618     buf_append_uint32(&st->buffer,st->index);
619     buf_append_uint32(&st->buffer,type);
620
621     struct xinfoadd xia;
622     append_string_xinfo_start(&st->buffer,&xia,st->localname);
623     if ((st->local_capabilities & CAPAB_EARLY) || (type != LABEL_MSG1)) {
624         buf_append_uint32(&st->buffer,st->local_capabilities);
625     }
626     if (type_is_msg34(type)) {
627         buf_append_uint16(&st->buffer,st->mtu_target);
628     }
629     append_string_xinfo_done(&st->buffer,&xia);
630
631     buf_append_string(&st->buffer,st->remotename);
632     BUF_ADD_OBJ(append,&st->buffer,st->localN);
633     if (type==LABEL_MSG1) return True;
634     BUF_ADD_OBJ(append,&st->buffer,st->remoteN);
635     if (type==LABEL_MSG2) return True;
636
637     if (hacky_par_mid_failnow()) return False;
638
639     if (type==LABEL_MSG3BIS)
640         buf_append_uint8(&st->buffer,st->chosen_transform->capab_transformnum);
641
642     dhpub=st->dh->makepublic(st->dh->st,st->dhsecret,st->dh->len);
643     buf_append_string(&st->buffer,dhpub);
644     free(dhpub);
645     hash=safe_malloc(st->hash->len, "generate_msg");
646     hst=st->hash->init();
647     st->hash->update(hst,st->buffer.start,st->buffer.size);
648     st->hash->final(hst,hash);
649     sig=st->privkey->sign(st->privkey->st,hash,st->hash->len);
650     buf_append_string(&st->buffer,sig);
651     free(sig);
652     free(hash);
653     return True;
654 }
655
656 static bool_t unpick_name(struct buffer_if *msg, struct parsedname *nm)
657 {
658     CHECK_AVAIL(msg,2);
659     nm->len=buf_unprepend_uint16(msg);
660     CHECK_AVAIL(msg,nm->len);
661     nm->name=buf_unprepend(msg,nm->len);
662     uint8_t *nul=memchr(nm->name,0,nm->len);
663     if (!nul) {
664         buffer_readonly_view(&nm->extrainfo,0,0);
665     } else {
666         buffer_readonly_view(&nm->extrainfo, nul+1, msg->start-(nul+1));
667         nm->len=nul-nm->name;
668     }
669     return True;
670 }
671
672 static bool_t unpick_msg(struct site *st, uint32_t type,
673                          struct buffer_if *msg, struct msg *m)
674 {
675     m->capab_transformnum=-1;
676     m->hashstart=msg->start;
677     CHECK_AVAIL(msg,4);
678     m->dest=buf_unprepend_uint32(msg);
679     CHECK_AVAIL(msg,4);
680     m->source=buf_unprepend_uint32(msg);
681     CHECK_TYPE(msg,type);
682     if (!unpick_name(msg,&m->remote)) return False;
683     m->remote_capabilities=0;
684     m->remote_mtu=0;
685     if (m->remote.extrainfo.size) {
686         CHECK_AVAIL(&m->remote.extrainfo,4);
687         m->remote_capabilities=buf_unprepend_uint32(&m->remote.extrainfo);
688     }
689     if (type_is_msg34(type) && m->remote.extrainfo.size) {
690         CHECK_AVAIL(&m->remote.extrainfo,2);
691         m->remote_mtu=buf_unprepend_uint16(&m->remote.extrainfo);
692     }
693     if (!unpick_name(msg,&m->local)) return False;
694     if (type==LABEL_PROD) {
695         CHECK_EMPTY(msg);
696         return True;
697     }
698     CHECK_AVAIL(msg,NONCELEN);
699     m->nR=buf_unprepend(msg,NONCELEN);
700     if (type==LABEL_MSG1) {
701         CHECK_EMPTY(msg);
702         return True;
703     }
704     CHECK_AVAIL(msg,NONCELEN);
705     m->nL=buf_unprepend(msg,NONCELEN);
706     if (type==LABEL_MSG2) {
707         CHECK_EMPTY(msg);
708         return True;
709     }
710     if (type==LABEL_MSG3BIS) {
711         CHECK_AVAIL(msg,1);
712         m->capab_transformnum = buf_unprepend_uint8(msg);
713     } else {
714         m->capab_transformnum = CAPAB_TRANSFORMNUM_ANCIENT;
715     }
716     CHECK_AVAIL(msg,2);
717     m->pklen=buf_unprepend_uint16(msg);
718     CHECK_AVAIL(msg,m->pklen);
719     m->pk=buf_unprepend(msg,m->pklen);
720     m->hashlen=msg->start-m->hashstart;
721     CHECK_AVAIL(msg,2);
722     m->siglen=buf_unprepend_uint16(msg);
723     CHECK_AVAIL(msg,m->siglen);
724     m->sig=buf_unprepend(msg,m->siglen);
725     CHECK_EMPTY(msg);
726
727     /* In `process_msg3_msg4' below, we assume that we can write a nul
728      * terminator following the signature.  Make sure there's enough space.
729      */
730     if (msg->start >= msg->base + msg->alloclen)
731         return False;
732
733     return True;
734 }
735
736 static bool_t name_matches(const struct parsedname *nm, const char *expected)
737 {
738     int expected_len=strlen(expected);
739     return
740         nm->len == expected_len &&
741         !memcmp(nm->name, expected, expected_len);
742 }    
743
744 static bool_t check_msg(struct site *st, uint32_t type, struct msg *m,
745                         cstring_t *error)
746 {
747     if (type==LABEL_MSG1) return True;
748
749     /* Check that the site names and our nonce have been sent
750        back correctly, and then store our peer's nonce. */ 
751     if (!name_matches(&m->remote,st->remotename)) {
752         *error="wrong remote site name";
753         return False;
754     }
755     if (!name_matches(&m->local,st->localname)) {
756         *error="wrong local site name";
757         return False;
758     }
759     if (memcmp(m->nL,st->localN,NONCELEN)!=0) {
760         *error="wrong locally-generated nonce";
761         return False;
762     }
763     if (type==LABEL_MSG2) return True;
764     if (!consttime_memeq(m->nR,st->remoteN,NONCELEN)!=0) {
765         *error="wrong remotely-generated nonce";
766         return False;
767     }
768     /* MSG3 has complicated rules about capabilities, which are
769      * handled in process_msg3. */
770     if (type==LABEL_MSG3 || type==LABEL_MSG3BIS) return True;
771     if (m->remote_capabilities!=st->remote_capabilities) {
772         *error="remote capabilities changed";
773         return False;
774     }
775     if (type==LABEL_MSG4) return True;
776     *error="unknown message type";
777     return False;
778 }
779
780 static bool_t generate_msg1(struct site *st)
781 {
782     st->random->generate(st->random->st,NONCELEN,st->localN);
783     return generate_msg(st,LABEL_MSG1,"site:MSG1");
784 }
785
786 static bool_t process_msg1(struct site *st, struct buffer_if *msg1,
787                            const struct comm_addr *src, struct msg *m)
788 {
789     /* We've already determined we're in an appropriate state to
790        process an incoming MSG1, and that the MSG1 has correct values
791        of A and B. */
792
793     st->setup_session_id=m->source;
794     st->remote_capabilities=m->remote_capabilities;
795     memcpy(st->remoteN,m->nR,NONCELEN);
796     return True;
797 }
798
799 static bool_t generate_msg2(struct site *st)
800 {
801     st->random->generate(st->random->st,NONCELEN,st->localN);
802     return generate_msg(st,LABEL_MSG2,"site:MSG2");
803 }
804
805 static bool_t process_msg2(struct site *st, struct buffer_if *msg2,
806                            const struct comm_addr *src)
807 {
808     struct msg m;
809     cstring_t err;
810
811     if (!unpick_msg(st,LABEL_MSG2,msg2,&m)) return False;
812     if (!check_msg(st,LABEL_MSG2,&m,&err)) {
813         slog(st,LOG_SEC,"msg2: %s",err);
814         return False;
815     }
816     st->setup_session_id=m.source;
817     st->remote_capabilities=m.remote_capabilities;
818
819     /* Select the transform to use */
820
821     uint32_t remote_transforms = st->remote_capabilities & CAPAB_TRANSFORM_MASK;
822     if (!remote_transforms)
823         /* old secnets only had this one transform */
824         remote_transforms = 1UL << CAPAB_TRANSFORMNUM_ANCIENT;
825
826     struct transform_if *ti;
827     int i;
828     for (i=0; i<st->ntransforms; i++) {
829         ti=st->transforms[i];
830         if ((1UL << ti->capab_transformnum) & remote_transforms)
831             goto transform_found;
832     }
833     slog(st,LOG_ERROR,"no transforms in common"
834          " (us %#"PRIx32"; them: %#"PRIx32")",
835          st->local_capabilities & CAPAB_TRANSFORM_MASK,
836          remote_transforms);
837     return False;
838  transform_found:
839     st->chosen_transform=ti;
840
841     memcpy(st->remoteN,m.nR,NONCELEN);
842     return True;
843 }
844
845 static bool_t generate_msg3(struct site *st)
846 {
847     /* Now we have our nonce and their nonce. Think of a secret key,
848        and create message number 3. */
849     st->random->generate(st->random->st,st->dh->len,st->dhsecret);
850     return generate_msg(st,
851                         (st->remote_capabilities & CAPAB_TRANSFORM_MASK
852                          ? LABEL_MSG3BIS : LABEL_MSG3),
853                         "site:MSG3");
854 }
855
856 static bool_t process_msg3_msg4(struct site *st, struct msg *m)
857 {
858     uint8_t *hash;
859     void *hst;
860
861     /* Check signature and store g^x mod m */
862     hash=safe_malloc(st->hash->len, "process_msg3_msg4");
863     hst=st->hash->init();
864     st->hash->update(hst,m->hashstart,m->hashlen);
865     st->hash->final(hst,hash);
866     /* Terminate signature with a '0' - already checked that this will fit */
867     m->sig[m->siglen]=0;
868     if (!st->pubkey->check(st->pubkey->st,hash,st->hash->len,m->sig)) {
869         slog(st,LOG_SEC,"msg3/msg4 signature failed check!");
870         free(hash);
871         return False;
872     }
873     free(hash);
874
875     st->remote_adv_mtu=m->remote_mtu;
876
877     return True;
878 }
879
880 static bool_t process_msg3(struct site *st, struct buffer_if *msg3,
881                            const struct comm_addr *src, uint32_t msgtype)
882 {
883     struct msg m;
884     cstring_t err;
885
886     assert(msgtype==LABEL_MSG3 || msgtype==LABEL_MSG3BIS);
887
888     if (!unpick_msg(st,msgtype,msg3,&m)) return False;
889     if (!check_msg(st,msgtype,&m,&err)) {
890         slog(st,LOG_SEC,"msg3: %s",err);
891         return False;
892     }
893     uint32_t capab_adv_late = m.remote_capabilities
894         & ~st->remote_capabilities & CAPAB_EARLY;
895     if (capab_adv_late) {
896         slog(st,LOG_SEC,"msg3 impermissibly adds early capability flag(s)"
897              " %#"PRIx32" (was %#"PRIx32", now %#"PRIx32")",
898              capab_adv_late, st->remote_capabilities, m.remote_capabilities);
899         return False;
900     }
901     st->remote_capabilities|=m.remote_capabilities;
902
903     struct transform_if *ti;
904     int i;
905     for (i=0; i<st->ntransforms; i++) {
906         ti=st->transforms[i];
907         if (ti->capab_transformnum == m.capab_transformnum)
908             goto transform_found;
909     }
910     slog(st,LOG_SEC,"peer chose unknown-to-us transform %d!",
911          m.capab_transformnum);
912     return False;
913  transform_found:
914     st->chosen_transform=ti;
915
916     if (!process_msg3_msg4(st,&m))
917         return False;
918
919     /* Terminate their DH public key with a '0' */
920     m.pk[m.pklen]=0;
921     /* Invent our DH secret key */
922     st->random->generate(st->random->st,st->dh->len,st->dhsecret);
923
924     /* Generate the shared key and set up the transform */
925     if (!set_new_transform(st,m.pk)) return False;
926
927     return True;
928 }
929
930 static bool_t generate_msg4(struct site *st)
931 {
932     /* We have both nonces, their public key and our private key. Generate
933        our public key, sign it and send it to them. */
934     return generate_msg(st,LABEL_MSG4,"site:MSG4");
935 }
936
937 static bool_t process_msg4(struct site *st, struct buffer_if *msg4,
938                            const struct comm_addr *src)
939 {
940     struct msg m;
941     cstring_t err;
942
943     if (!unpick_msg(st,LABEL_MSG4,msg4,&m)) return False;
944     if (!check_msg(st,LABEL_MSG4,&m,&err)) {
945         slog(st,LOG_SEC,"msg4: %s",err);
946         return False;
947     }
948     
949     if (!process_msg3_msg4(st,&m))
950         return False;
951
952     /* Terminate their DH public key with a '0' */
953     m.pk[m.pklen]=0;
954
955     /* Generate the shared key and set up the transform */
956     if (!set_new_transform(st,m.pk)) return False;
957
958     return True;
959 }
960
961 struct msg0 {
962     uint32_t dest;
963     uint32_t source;
964     uint32_t type;
965 };
966
967 static bool_t unpick_msg0(struct site *st, struct buffer_if *msg0,
968                           struct msg0 *m)
969 {
970     CHECK_AVAIL(msg0,4);
971     m->dest=buf_unprepend_uint32(msg0);
972     CHECK_AVAIL(msg0,4);
973     m->source=buf_unprepend_uint32(msg0);
974     CHECK_AVAIL(msg0,4);
975     m->type=buf_unprepend_uint32(msg0);
976     return True;
977     /* Leaves transformed part of buffer untouched */
978 }
979
980 static bool_t generate_msg5(struct site *st)
981 {
982     cstring_t transform_err;
983
984     BUF_ALLOC(&st->buffer,"site:MSG5");
985     /* We are going to add four words to the message */
986     buffer_init(&st->buffer,calculate_max_start_pad());
987     /* Give the netlink code an opportunity to put its own stuff in the
988        message (configuration information, etc.) */
989     buf_prepend_uint32(&st->buffer,LABEL_MSG5);
990     if (call_transform_forwards(st,st->new_transform,
991                                 &st->buffer,&transform_err))
992         return False;
993     buf_prepend_uint32(&st->buffer,LABEL_MSG5);
994     buf_prepend_uint32(&st->buffer,st->index);
995     buf_prepend_uint32(&st->buffer,st->setup_session_id);
996
997     st->retries=st->setup_retries;
998     return True;
999 }
1000
1001 static bool_t process_msg5(struct site *st, struct buffer_if *msg5,
1002                            const struct comm_addr *src,
1003                            struct transform_inst_if *transform)
1004 {
1005     struct msg0 m;
1006     cstring_t transform_err;
1007
1008     if (!unpick_msg0(st,msg5,&m)) return False;
1009
1010     if (call_transform_reverse(st,transform,msg5,&transform_err)) {
1011         /* There's a problem */
1012         slog(st,LOG_SEC,"process_msg5: transform: %s",transform_err);
1013         return False;
1014     }
1015     /* Buffer should now contain untransformed PING packet data */
1016     CHECK_AVAIL(msg5,4);
1017     if (buf_unprepend_uint32(msg5)!=LABEL_MSG5) {
1018         slog(st,LOG_SEC,"MSG5/PING packet contained wrong label");
1019         return False;
1020     }
1021     /* Older versions of secnet used to write some config data here
1022      * which we ignore.  So we don't CHECK_EMPTY */
1023     return True;
1024 }
1025
1026 static void create_msg6(struct site *st, struct transform_inst_if *transform,
1027                         uint32_t session_id)
1028 {
1029     cstring_t transform_err;
1030
1031     BUF_ALLOC(&st->buffer,"site:MSG6");
1032     /* We are going to add four words to the message */
1033     buffer_init(&st->buffer,calculate_max_start_pad());
1034     /* Give the netlink code an opportunity to put its own stuff in the
1035        message (configuration information, etc.) */
1036     buf_prepend_uint32(&st->buffer,LABEL_MSG6);
1037     transform_apply_return problem =
1038         call_transform_forwards(st,transform,
1039                                 &st->buffer,&transform_err);
1040     assert(!problem);
1041     buf_prepend_uint32(&st->buffer,LABEL_MSG6);
1042     buf_prepend_uint32(&st->buffer,st->index);
1043     buf_prepend_uint32(&st->buffer,session_id);
1044 }
1045
1046 static bool_t generate_msg6(struct site *st)
1047 {
1048     if (!is_transform_valid(st->new_transform))
1049         return False;
1050     create_msg6(st,st->new_transform,st->setup_session_id);
1051     st->retries=1; /* Peer will retransmit MSG5 if this packet gets lost */
1052     return True;
1053 }
1054
1055 static bool_t process_msg6(struct site *st, struct buffer_if *msg6,
1056                            const struct comm_addr *src)
1057 {
1058     struct msg0 m;
1059     cstring_t transform_err;
1060
1061     if (!unpick_msg0(st,msg6,&m)) return False;
1062
1063     if (call_transform_reverse(st,st->new_transform,msg6,&transform_err)) {
1064         /* There's a problem */
1065         slog(st,LOG_SEC,"process_msg6: transform: %s",transform_err);
1066         return False;
1067     }
1068     /* Buffer should now contain untransformed PING packet data */
1069     CHECK_AVAIL(msg6,4);
1070     if (buf_unprepend_uint32(msg6)!=LABEL_MSG6) {
1071         slog(st,LOG_SEC,"MSG6/PONG packet contained invalid data");
1072         return False;
1073     }
1074     /* Older versions of secnet used to write some config data here
1075      * which we ignore.  So we don't CHECK_EMPTY */
1076     return True;
1077 }
1078
1079 static transform_apply_return
1080 decrypt_msg0(struct site *st, struct buffer_if *msg0,
1081                            const struct comm_addr *src)
1082 {
1083     cstring_t transform_err, auxkey_err, newkey_err="n/a";
1084     struct msg0 m;
1085     transform_apply_return problem;
1086
1087     if (!unpick_msg0(st,msg0,&m)) return False;
1088
1089     /* Keep a copy so we can try decrypting it with multiple keys */
1090     buffer_copy(&st->scratch, msg0);
1091
1092     problem = call_transform_reverse(st,st->current.transform,
1093                                      msg0,&transform_err);
1094     if (!problem) {
1095         if (!st->auxiliary_is_new)
1096             delete_one_key(st,&st->auxiliary_key,
1097                            "peer has used new key","auxiliary key",LOG_SEC);
1098         return 0;
1099     }
1100     if (transform_apply_return_badseq(problem))
1101         goto badseq;
1102
1103     buffer_copy(msg0, &st->scratch);
1104     problem = call_transform_reverse(st,st->auxiliary_key.transform,
1105                                      msg0,&auxkey_err);
1106     if (!problem) {
1107         slog(st,LOG_DROP,"processing packet which uses auxiliary key");
1108         if (st->auxiliary_is_new) {
1109             /* We previously timed out in state SENTMSG5 but it turns
1110              * out that our peer did in fact get our MSG5 and is
1111              * using the new key.  So we should switch to it too. */
1112             /* This is a bit like activate_new_key. */
1113             struct data_key t;
1114             t=st->current;
1115             st->current=st->auxiliary_key;
1116             st->auxiliary_key=t;
1117
1118             delete_one_key(st,&st->auxiliary_key,"peer has used new key",
1119                            "previous key",LOG_SEC);
1120             st->auxiliary_is_new=0;
1121             st->renegotiate_key_time=st->auxiliary_renegotiate_key_time;
1122         }
1123         return 0;
1124     }
1125     if (transform_apply_return_badseq(problem))
1126         goto badseq;
1127
1128     if (st->state==SITE_SENTMSG5) {
1129         buffer_copy(msg0, &st->scratch);
1130         problem = call_transform_reverse(st,st->new_transform,
1131                                          msg0,&newkey_err);
1132         if (!problem) {
1133             /* It looks like we didn't get the peer's MSG6 */
1134             /* This is like a cut-down enter_new_state(SITE_RUN) */
1135             slog(st,LOG_STATE,"will enter state RUN (MSG0 with new key)");
1136             BUF_FREE(&st->buffer);
1137             st->timeout=0;
1138             activate_new_key(st);
1139             return 0; /* do process the data in this packet */
1140         }
1141         if (transform_apply_return_badseq(problem))
1142             goto badseq;
1143     }
1144
1145     slog(st,LOG_SEC,"transform: %s (aux: %s, new: %s)",
1146          transform_err,auxkey_err,newkey_err);
1147     initiate_key_setup(st,"incoming message would not decrypt",0);
1148     send_nak(src,m.dest,m.source,m.type,msg0,"message would not decrypt");
1149     assert(problem);
1150     return problem;
1151
1152  badseq:
1153     slog(st,LOG_DROP,"transform: %s (bad seq.)",transform_err);
1154     assert(problem);
1155     return problem;
1156 }
1157
1158 static bool_t process_msg0(struct site *st, struct buffer_if *msg0,
1159                            const struct comm_addr *src)
1160 {
1161     uint32_t type;
1162     transform_apply_return problem;
1163
1164     problem = decrypt_msg0(st,msg0,src);
1165     if (problem==transform_apply_seqdupe) {
1166         /* We recently received another copy of this packet, maybe due
1167          * to polypath.  That's not a problem; indeed, for the
1168          * purposes of transport address management it is a success.
1169          * But we don't want to process the packet. */
1170         transport_data_msgok(st,src);
1171         return False;
1172     }
1173     if (problem)
1174         return False;
1175
1176     CHECK_AVAIL(msg0,4);
1177     type=buf_unprepend_uint32(msg0);
1178     switch(type) {
1179     case LABEL_MSG7:
1180         /* We must forget about the current session. */
1181         delete_keys(st,"request from peer",LOG_SEC);
1182         /* probably, the peer is shutting down, and this is going to fail,
1183          * but we need to be trying to bring the link up again */
1184         if (st->keepalive)
1185             initiate_key_setup(st,"peer requested key teardown",0);
1186         return True;
1187     case LABEL_MSG9:
1188         /* Deliver to netlink layer */
1189         st->netlink->deliver(st->netlink->st,msg0);
1190         transport_data_msgok(st,src);
1191         /* See whether we should start negotiating a new key */
1192         if (st->now > st->renegotiate_key_time)
1193             initiate_key_setup(st,"incoming packet in renegotiation window",0);
1194         return True;
1195     default:
1196         slog(st,LOG_SEC,"incoming encrypted message of type %08x "
1197              "(unknown)",type);
1198         break;
1199     }
1200     return False;
1201 }
1202
1203 static void dump_packet(struct site *st, struct buffer_if *buf,
1204                         const struct comm_addr *addr, bool_t incoming,
1205                         bool_t ok)
1206 {
1207     uint32_t dest=get_uint32(buf->start);
1208     uint32_t source=get_uint32(buf->start+4);
1209     uint32_t msgtype=get_uint32(buf->start+8);
1210
1211     if (st->log_events & LOG_DUMP)
1212         slilog(st->log,M_DEBUG,"%s: %s: %08x<-%08x: %08x: %s%s",
1213                st->tunname,incoming?"incoming":"outgoing",
1214                dest,source,msgtype,comm_addr_to_string(addr),
1215                ok?"":" - fail");
1216 }
1217
1218 static bool_t comm_addr_sendmsg(struct site *st,
1219                                 const struct comm_addr *dest,
1220                                 struct buffer_if *buf)
1221 {
1222     int i;
1223     struct comm_clientinfo *commclientinfo = 0;
1224
1225     for (i=0; i < st->ncomms; i++) {
1226         if (st->comms[i] == dest->comm) {
1227             commclientinfo = st->commclientinfos[i];
1228             break;
1229         }
1230     }
1231     return dest->comm->sendmsg(dest->comm->st, buf, dest, commclientinfo);
1232 }
1233
1234 static uint32_t site_status(void *st)
1235 {
1236     return 0;
1237 }
1238
1239 static bool_t send_msg(struct site *st)
1240 {
1241     if (st->retries>0) {
1242         transport_xmit(st, &st->setup_peers, &st->buffer, True);
1243         st->timeout=st->now+st->setup_retry_interval;
1244         st->retries--;
1245         return True;
1246     } else if (st->state==SITE_SENTMSG5) {
1247         logtimeout(st,"timed out sending MSG5, stashing new key");
1248         /* We stash the key we have produced, in case it turns out that
1249          * our peer did see our MSG5 after all and starts using it. */
1250         /* This is a bit like some of activate_new_key */
1251         struct transform_inst_if *t;
1252         t=st->auxiliary_key.transform;
1253         st->auxiliary_key.transform=st->new_transform;
1254         st->new_transform=t;
1255         dispose_transform(&st->new_transform);
1256
1257         st->auxiliary_is_new=1;
1258         st->auxiliary_key.key_timeout=st->now+st->key_lifetime;
1259         st->auxiliary_renegotiate_key_time=st->now+st->key_renegotiate_time;
1260         st->auxiliary_key.remote_session_id=st->setup_session_id;
1261
1262         enter_state_wait(st);
1263         return False;
1264     } else {
1265         logtimeout(st,"timed out sending key setup packet "
1266             "(in state %s)",state_name(st->state));
1267         enter_state_wait(st);
1268         return False;
1269     }
1270 }
1271
1272 static void site_resolve_callback(void *sst, const struct comm_addr *addrs,
1273                                   int stored_naddrs, int all_naddrs,
1274                                   const char *address, const char *failwhy)
1275 {
1276     struct site *st=sst;
1277
1278     if (!stored_naddrs) {
1279         slog(st,LOG_ERROR,"resolution of %s failed: %s",address,failwhy);
1280     } else {
1281         slog(st,LOG_PEER_ADDRS,"resolution of %s completed, %d addrs, eg: %s",
1282              address, all_naddrs, comm_addr_to_string(&addrs[0]));;
1283
1284         int space=st->transport_peers_max-st->resolving_n_results_stored;
1285         int n_tocopy=MIN(stored_naddrs,space);
1286         COPY_ARRAY(st->resolving_results + st->resolving_n_results_stored,
1287                    addrs,
1288                    n_tocopy);
1289         st->resolving_n_results_stored += n_tocopy;
1290         st->resolving_n_results_all += all_naddrs;
1291     }
1292
1293     decrement_resolving_count(st,1);
1294 }
1295
1296 static void decrement_resolving_count(struct site *st, int by)
1297 {
1298     assert(st->resolving_count>0);
1299     st->resolving_count-=by;
1300
1301     if (st->resolving_count)
1302         return;
1303
1304     /* OK, we are done with them all.  Handle combined results. */
1305
1306     const struct comm_addr *addrs=st->resolving_results;
1307     int naddrs=st->resolving_n_results_stored;
1308     assert(naddrs<=st->transport_peers_max);
1309
1310     if (naddrs) {
1311         if (naddrs != st->resolving_n_results_all) {
1312             slog(st,LOG_SETUP_INIT,"resolution of supplied addresses/names"
1313                  " yielded too many results (%d > %d), some ignored",
1314                  st->resolving_n_results_all, naddrs);
1315         }
1316         slog(st,LOG_STATE,"resolution completed, %d addrs, eg: %s",
1317              naddrs, iaddr_to_string(&addrs[0].ia));;
1318     }
1319
1320     switch (st->state) {
1321     case SITE_RESOLVE:
1322         if (transport_compute_setupinit_peers(st,addrs,naddrs,0)) {
1323             enter_new_state(st,SITE_SENTMSG1);
1324         } else {
1325             /* Can't figure out who to try to to talk to */
1326             slog(st,LOG_SETUP_INIT,
1327                  "key exchange failed: cannot find peer address");
1328             enter_state_run(st);
1329         }
1330         break;
1331     case SITE_SENTMSG1: case SITE_SENTMSG2:
1332     case SITE_SENTMSG3: case SITE_SENTMSG4:
1333     case SITE_SENTMSG5:
1334         if (naddrs) {
1335             /* We start using the address immediately for data too.
1336              * It's best to store it in st->peers now because we might
1337              * go via SENTMSG5, WAIT, and a MSG0, straight into using
1338              * the new key (without updating the data peer addrs). */
1339             transport_resolve_complete(st,addrs,naddrs);
1340         } else if (st->local_mobile) {
1341             /* We can't let this rest because we may have a peer
1342              * address which will break in the future. */
1343             slog(st,LOG_SETUP_INIT,"resolution failed: "
1344                  "abandoning key exchange");
1345             enter_state_wait(st);
1346         } else {
1347             slog(st,LOG_SETUP_INIT,"resolution failed: "
1348                  " continuing to use source address of peer's packets"
1349                  " for key exchange and ultimately data");
1350         }
1351         break;
1352     case SITE_RUN:
1353         if (naddrs) {
1354             slog(st,LOG_SETUP_INIT,"resolution completed tardily,"
1355                  " updating peer address(es)");
1356             transport_resolve_complete_tardy(st,addrs,naddrs);
1357         } else if (st->local_mobile) {
1358             /* Not very good.  We should queue (another) renegotiation
1359              * so that we can update the peer address. */
1360             st->key_renegotiate_time=st->now+wait_timeout(st);
1361         } else {
1362             slog(st,LOG_SETUP_INIT,"resolution failed: "
1363                  " continuing to use source address of peer's packets");
1364         }
1365         break;
1366     case SITE_WAIT:
1367     case SITE_STOP:
1368         /* oh well */
1369         break;
1370     }
1371 }
1372
1373 static bool_t initiate_key_setup(struct site *st, cstring_t reason,
1374                                  const struct comm_addr *prod_hint)
1375 {
1376     /* Reentrancy hazard: can call enter_new_state/enter_state_* */
1377     if (st->state!=SITE_RUN) return False;
1378     slog(st,LOG_SETUP_INIT,"initiating key exchange (%s)",reason);
1379     if (st->addresses) {
1380         slog(st,LOG_SETUP_INIT,"resolving peer address(es)");
1381         return enter_state_resolve(st);
1382     } else if (transport_compute_setupinit_peers(st,0,0,prod_hint)) {
1383         return enter_new_state(st,SITE_SENTMSG1);
1384     }
1385     slog(st,LOG_SETUP_INIT,"key exchange failed: no address for peer");
1386     return False;
1387 }
1388
1389 static void activate_new_key(struct site *st)
1390 {
1391     struct transform_inst_if *t;
1392
1393     /* We have three transform instances, which we swap between old,
1394        active and setup */
1395     t=st->auxiliary_key.transform;
1396     st->auxiliary_key.transform=st->current.transform;
1397     st->current.transform=st->new_transform;
1398     st->new_transform=t;
1399     dispose_transform(&st->new_transform);
1400
1401     st->timeout=0;
1402     st->auxiliary_is_new=0;
1403     st->auxiliary_key.key_timeout=st->current.key_timeout;
1404     st->current.key_timeout=st->now+st->key_lifetime;
1405     st->renegotiate_key_time=st->now+st->key_renegotiate_time;
1406     transport_peers_copy(st,&st->peers,&st->setup_peers);
1407     st->current.remote_session_id=st->setup_session_id;
1408
1409     /* Compute the inter-site MTU.  This is min( our_mtu, their_mtu ).
1410      * But their mtu be unspecified, in which case we just use ours. */
1411     uint32_t intersite_mtu=
1412         MIN(st->mtu_target, st->remote_adv_mtu ?: ~(uint32_t)0);
1413     st->netlink->set_mtu(st->netlink->st,intersite_mtu);
1414
1415     slog(st,LOG_ACTIVATE_KEY,"new key activated"
1416          " (mtu ours=%"PRId32" theirs=%"PRId32" intersite=%"PRId32")",
1417          st->mtu_target, st->remote_adv_mtu, intersite_mtu);
1418     enter_state_run(st);
1419 }
1420
1421 static void delete_one_key(struct site *st, struct data_key *key,
1422                            cstring_t reason, cstring_t which, uint32_t loglevel)
1423 {
1424     if (!is_transform_valid(key->transform)) return;
1425     if (reason) slog(st,loglevel,"%s deleted (%s)",which,reason);
1426     dispose_transform(&key->transform);
1427     key->key_timeout=0;
1428 }
1429
1430 static void delete_keys(struct site *st, cstring_t reason, uint32_t loglevel)
1431 {
1432     if (current_valid(st)) {
1433         slog(st,loglevel,"session closed (%s)",reason);
1434
1435         delete_one_key(st,&st->current,0,0,0);
1436         set_link_quality(st);
1437     }
1438     delete_one_key(st,&st->auxiliary_key,0,0,0);
1439 }
1440
1441 static void state_assert(struct site *st, bool_t ok)
1442 {
1443     if (!ok) fatal("site:state_assert");
1444 }
1445
1446 static void enter_state_stop(struct site *st)
1447 {
1448     st->state=SITE_STOP;
1449     st->timeout=0;
1450     delete_keys(st,"entering state STOP",LOG_TIMEOUT_KEY);
1451     dispose_transform(&st->new_transform);
1452 }
1453
1454 static void set_link_quality(struct site *st)
1455 {
1456     uint32_t quality;
1457     if (current_valid(st))
1458         quality=LINK_QUALITY_UP;
1459     else if (st->state==SITE_WAIT || st->state==SITE_STOP)
1460         quality=LINK_QUALITY_DOWN;
1461     else if (st->addresses)
1462         quality=LINK_QUALITY_DOWN_CURRENT_ADDRESS;
1463     else if (transport_peers_valid(&st->peers))
1464         quality=LINK_QUALITY_DOWN_STALE_ADDRESS;
1465     else
1466         quality=LINK_QUALITY_DOWN;
1467
1468     st->netlink->set_quality(st->netlink->st,quality);
1469 }
1470
1471 static void enter_state_run(struct site *st)
1472 {
1473     slog(st,LOG_STATE,"entering state RUN");
1474     st->state=SITE_RUN;
1475     st->timeout=0;
1476
1477     st->setup_session_id=0;
1478     transport_peers_clear(st,&st->setup_peers);
1479     FILLZERO(st->localN);
1480     FILLZERO(st->remoteN);
1481     dispose_transform(&st->new_transform);
1482     memset(st->dhsecret,0,st->dh->len);
1483     if (st->sharedsecret) memset(st->sharedsecret,0,st->sharedsecretlen);
1484     set_link_quality(st);
1485
1486     if (st->keepalive && !current_valid(st))
1487         initiate_key_setup(st, "keepalive", 0);
1488 }
1489
1490 static bool_t ensure_resolving(struct site *st)
1491 {
1492     /* Reentrancy hazard: may call site_resolve_callback and hence
1493      * enter_new_state, enter_state_* and generate_msg*. */
1494     if (st->resolving_count)
1495         return True;
1496
1497     assert(st->addresses);
1498
1499     /* resolver->request might reentrantly call site_resolve_callback
1500      * which will decrement st->resolving, so we need to increment it
1501      * twice beforehand to prevent decrement from thinking we're
1502      * finished, and decrement it ourselves.  Alternatively if
1503      * everything fails then there are no callbacks due and we simply
1504      * set it to 0 and return false.. */
1505     st->resolving_n_results_stored=0;
1506     st->resolving_n_results_all=0;
1507     st->resolving_count+=2;
1508     const char **addrp=st->addresses;
1509     const char *address;
1510     bool_t anyok=False;
1511     for (; (address=*addrp++); ) {
1512         bool_t ok = st->resolver->request(st->resolver->st,address,
1513                                           st->remoteport,st->comms[0],
1514                                           site_resolve_callback,st);
1515         if (ok)
1516             st->resolving_count++;
1517         anyok|=ok;
1518     }
1519     if (!anyok) {
1520         st->resolving_count=0;
1521         return False;
1522     }
1523     decrement_resolving_count(st,2);
1524     return True;
1525 }
1526
1527 static bool_t enter_state_resolve(struct site *st)
1528 {
1529     /* Reentrancy hazard!  See ensure_resolving. */
1530     state_assert(st,st->state==SITE_RUN);
1531     slog(st,LOG_STATE,"entering state RESOLVE");
1532     st->state=SITE_RESOLVE;
1533     return ensure_resolving(st);
1534 }
1535
1536 static bool_t enter_new_state(struct site *st, uint32_t next)
1537 {
1538     bool_t (*gen)(struct site *st);
1539     int r;
1540
1541     slog(st,LOG_STATE,"entering state %s",state_name(next));
1542     switch(next) {
1543     case SITE_SENTMSG1:
1544         state_assert(st,st->state==SITE_RUN || st->state==SITE_RESOLVE);
1545         gen=generate_msg1;
1546         st->msg1_crossed_logged = False;
1547         break;
1548     case SITE_SENTMSG2:
1549         state_assert(st,st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1550                      st->state==SITE_SENTMSG1 || st->state==SITE_WAIT);
1551         gen=generate_msg2;
1552         break;
1553     case SITE_SENTMSG3:
1554         state_assert(st,st->state==SITE_SENTMSG1);
1555         BUF_FREE(&st->buffer);
1556         gen=generate_msg3;
1557         break;
1558     case SITE_SENTMSG4:
1559         state_assert(st,st->state==SITE_SENTMSG2);
1560         BUF_FREE(&st->buffer);
1561         gen=generate_msg4;
1562         break;
1563     case SITE_SENTMSG5:
1564         state_assert(st,st->state==SITE_SENTMSG3);
1565         BUF_FREE(&st->buffer);
1566         gen=generate_msg5;
1567         break;
1568     case SITE_RUN:
1569         state_assert(st,st->state==SITE_SENTMSG4);
1570         BUF_FREE(&st->buffer);
1571         gen=generate_msg6;
1572         break;
1573     default:
1574         gen=NULL;
1575         fatal("enter_new_state(%s): invalid new state",state_name(next));
1576         break;
1577     }
1578
1579     if (hacky_par_start_failnow()) return False;
1580
1581     r= gen(st) && send_msg(st);
1582
1583     hacky_par_end(&r,
1584                   st->setup_retries, st->setup_retry_interval,
1585                   send_msg, st);
1586     
1587     if (r) {
1588         st->state=next;
1589         if (next==SITE_RUN) {
1590             BUF_FREE(&st->buffer); /* Never reused */
1591             st->timeout=0; /* Never retransmit */
1592             activate_new_key(st);
1593         }
1594         return True;
1595     }
1596     slog(st,LOG_ERROR,"error entering state %s",state_name(next));
1597     st->buffer.free=False; /* Unconditionally use the buffer; it may be
1598                               in either state, and enter_state_wait() will
1599                               do a BUF_FREE() */
1600     enter_state_wait(st);
1601     return False;
1602 }
1603
1604 /* msg7 tells our peer that we're about to forget our key */
1605 static bool_t send_msg7(struct site *st, cstring_t reason)
1606 {
1607     cstring_t transform_err;
1608
1609     if (current_valid(st) && st->buffer.free
1610         && transport_peers_valid(&st->peers)) {
1611         BUF_ALLOC(&st->buffer,"site:MSG7");
1612         buffer_init(&st->buffer,calculate_max_start_pad());
1613         buf_append_uint32(&st->buffer,LABEL_MSG7);
1614         buf_append_string(&st->buffer,reason);
1615         if (call_transform_forwards(st, st->current.transform,
1616                                     &st->buffer, &transform_err))
1617             goto free_out;
1618         buf_prepend_uint32(&st->buffer,LABEL_MSG0);
1619         buf_prepend_uint32(&st->buffer,st->index);
1620         buf_prepend_uint32(&st->buffer,st->current.remote_session_id);
1621         transport_xmit(st,&st->peers,&st->buffer,True);
1622         BUF_FREE(&st->buffer);
1623     free_out:
1624         return True;
1625     }
1626     return False;
1627 }
1628
1629 /* We go into this state if our peer becomes uncommunicative. Similar to
1630    the "stop" state, we forget all session keys for a while, before
1631    re-entering the "run" state. */
1632 static void enter_state_wait(struct site *st)
1633 {
1634     slog(st,LOG_STATE,"entering state WAIT");
1635     st->timeout=st->now+wait_timeout(st);
1636     st->state=SITE_WAIT;
1637     set_link_quality(st);
1638     BUF_FREE(&st->buffer); /* will have had an outgoing packet in it */
1639     /* XXX Erase keys etc. */
1640 }
1641
1642 static void generate_prod(struct site *st, struct buffer_if *buf)
1643 {
1644     buffer_init(buf,0);
1645     buf_append_uint32(buf,0);
1646     buf_append_uint32(buf,0);
1647     buf_append_uint32(buf,LABEL_PROD);
1648     buf_append_string(buf,st->localname);
1649     buf_append_string(buf,st->remotename);
1650 }
1651
1652 static void generate_send_prod(struct site *st,
1653                                const struct comm_addr *source)
1654 {
1655     if (!st->allow_send_prod) return; /* too soon */
1656     if (!(st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1657           st->state==SITE_WAIT)) return; /* we'd ignore peer's MSG1 */
1658
1659     slog(st,LOG_SETUP_INIT,"prodding peer for key exchange");
1660     st->allow_send_prod=0;
1661     generate_prod(st,&st->scratch);
1662     bool_t ok = comm_addr_sendmsg(st, source, &st->scratch);
1663     dump_packet(st,&st->scratch,source,False,ok);
1664 }
1665
1666 static inline void site_settimeout(uint64_t timeout, int *timeout_io)
1667 {
1668     if (timeout) {
1669         int64_t offset=timeout-*now;
1670         if (offset<0) offset=0;
1671         if (offset>INT_MAX) offset=INT_MAX;
1672         if (*timeout_io<0 || offset<*timeout_io)
1673             *timeout_io=offset;
1674     }
1675 }
1676
1677 static int site_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
1678                            int *timeout_io)
1679 {
1680     struct site *st=sst;
1681
1682     BEFOREPOLL_WANT_FDS(0); /* We don't use any file descriptors */
1683     st->now=*now;
1684
1685     /* Work out when our next timeout is. The earlier of 'timeout' or
1686        'current.key_timeout'. A stored value of '0' indicates no timeout
1687        active. */
1688     site_settimeout(st->timeout, timeout_io);
1689     site_settimeout(st->current.key_timeout, timeout_io);
1690     site_settimeout(st->auxiliary_key.key_timeout, timeout_io);
1691
1692     return 0; /* success */
1693 }
1694
1695 static void check_expiry(struct site *st, struct data_key *key,
1696                          const char *which)
1697 {
1698     if (key->key_timeout && *now>key->key_timeout) {
1699         delete_one_key(st,key,"maximum life exceeded",which,LOG_TIMEOUT_KEY);
1700     }
1701 }
1702
1703 /* NB site_afterpoll will be called before site_beforepoll is ever called */
1704 static void site_afterpoll(void *sst, struct pollfd *fds, int nfds)
1705 {
1706     struct site *st=sst;
1707
1708     st->now=*now;
1709     if (st->timeout && *now>st->timeout) {
1710         st->timeout=0;
1711         if (st->state>=SITE_SENTMSG1 && st->state<=SITE_SENTMSG5) {
1712             if (!hacky_par_start_failnow())
1713                 send_msg(st);
1714         } else if (st->state==SITE_WAIT) {
1715             enter_state_run(st);
1716         } else {
1717             slog(st,LOG_ERROR,"site_afterpoll: unexpected timeout, state=%d",
1718                  st->state);
1719         }
1720     }
1721     check_expiry(st,&st->current,"current key");
1722     check_expiry(st,&st->auxiliary_key,"auxiliary key");
1723 }
1724
1725 /* This function is called by the netlink device to deliver packets
1726    intended for the remote network. The packet is in "raw" wire
1727    format, but is guaranteed to be word-aligned. */
1728 static void site_outgoing(void *sst, struct buffer_if *buf)
1729 {
1730     struct site *st=sst;
1731     cstring_t transform_err;
1732     
1733     if (st->state==SITE_STOP) {
1734         BUF_FREE(buf);
1735         return;
1736     }
1737
1738     st->allow_send_prod=1;
1739
1740     /* In all other states we consider delivering the packet if we have
1741        a valid key and a valid address to send it to. */
1742     if (current_valid(st) && transport_peers_valid(&st->peers)) {
1743         /* Transform it and send it */
1744         if (buf->size>0) {
1745             buf_prepend_uint32(buf,LABEL_MSG9);
1746             if (call_transform_forwards(st, st->current.transform,
1747                                         buf, &transform_err))
1748                 goto free_out;
1749             buf_prepend_uint32(buf,LABEL_MSG0);
1750             buf_prepend_uint32(buf,st->index);
1751             buf_prepend_uint32(buf,st->current.remote_session_id);
1752             transport_xmit(st,&st->peers,buf,False);
1753         }
1754     free_out:
1755         BUF_FREE(buf);
1756         return;
1757     }
1758
1759     slog(st,LOG_DROP,"discarding outgoing packet of size %d",buf->size);
1760     BUF_FREE(buf);
1761     initiate_key_setup(st,"outgoing packet",0);
1762 }
1763
1764 static bool_t named_for_us(struct site *st, const struct buffer_if *buf_in,
1765                            uint32_t type, struct msg *m)
1766     /* For packets which are identified by the local and remote names.
1767      * If it has our name and our peer's name in it it's for us. */
1768 {
1769     struct buffer_if buf[1];
1770     buffer_readonly_clone(buf,buf_in);
1771     return unpick_msg(st,type,buf,m)
1772         && name_matches(&m->remote,st->remotename)
1773         && name_matches(&m->local,st->localname);
1774 }
1775
1776 static bool_t we_have_priority(struct site *st, const struct msg *m) {
1777     if ((st->local_capabilities & m->remote_capabilities)
1778         && CAPAB_PRIORITY_MOBILE) {
1779         if (st->local_mobile) return True;
1780         if (st-> peer_mobile) return False;
1781     }
1782     return st->our_name_later;
1783 }
1784
1785 static bool_t setup_late_msg_ok(struct site *st, 
1786                                 const struct buffer_if *buf_in,
1787                                 uint32_t msgtype,
1788                                 const struct comm_addr *source) {
1789     /* For setup packets which seem from their type like they are
1790      * late.  Maybe they came via a different path.  All we do is make
1791      * a note of the sending address, iff they look like they are part
1792      * of the current key setup attempt. */
1793     struct msg m;
1794     if (!named_for_us(st,buf_in,msgtype,&m))
1795         /* named_for_us calls unpick_msg which gets the nonces */
1796         return False;
1797     if (!consttime_memeq(m.nR,st->remoteN,NONCELEN) ||
1798         !consttime_memeq(m.nL,st->localN, NONCELEN))
1799         /* spoof ?  from stale run ?  who knows */
1800         return False;
1801     transport_setup_msgok(st,source);
1802     return True;
1803 }
1804
1805 /* This function is called by the communication device to deliver
1806    packets from our peers.
1807    It should return True if the packet is recognised as being for
1808    this current site instance (and should therefore not be processed
1809    by other sites), even if the packet was otherwise ignored. */
1810 static bool_t site_incoming(void *sst, struct buffer_if *buf,
1811                             const struct comm_addr *source)
1812 {
1813     struct site *st=sst;
1814
1815     if (buf->size < 12) return False;
1816
1817     uint32_t dest=get_uint32(buf->start);
1818     uint32_t msgtype=get_uint32(buf->start+8);
1819     struct msg named_msg;
1820
1821     if (msgtype==LABEL_MSG1) {
1822         if (!named_for_us(st,buf,msgtype,&named_msg))
1823             return False;
1824         /* It's a MSG1 addressed to us. Decide what to do about it. */
1825         dump_packet(st,buf,source,True,True);
1826         if (st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1827             st->state==SITE_WAIT) {
1828             /* We should definitely process it */
1829             transport_compute_setupinit_peers(st,0,0,source);
1830             if (process_msg1(st,buf,source,&named_msg)) {
1831                 slog(st,LOG_SETUP_INIT,"key setup initiated by peer");
1832                 bool_t entered=enter_new_state(st,SITE_SENTMSG2);
1833                 if (entered && st->addresses && st->local_mobile)
1834                     /* We must do this as the very last thing, because
1835                        the resolver callback might reenter us. */
1836                     ensure_resolving(st);
1837             } else {
1838                 slog(st,LOG_ERROR,"failed to process incoming msg1");
1839             }
1840             BUF_FREE(buf);
1841             return True;
1842         } else if (st->state==SITE_SENTMSG1) {
1843             /* We've just sent a message 1! They may have crossed on
1844                the wire. If we have priority then we ignore the
1845                incoming one, otherwise we process it as usual. */
1846             if (we_have_priority(st,&named_msg)) {
1847                 BUF_FREE(buf);
1848                 if (!st->msg1_crossed_logged++)
1849                     slog(st,LOG_SETUP_INIT,"crossed msg1s; we are higher "
1850                          "priority => ignore incoming msg1");
1851                 return True;
1852             } else {
1853                 slog(st,LOG_SETUP_INIT,"crossed msg1s; we are lower "
1854                      "priority => use incoming msg1");
1855                 if (process_msg1(st,buf,source,&named_msg)) {
1856                     BUF_FREE(&st->buffer); /* Free our old message 1 */
1857                     transport_setup_msgok(st,source);
1858                     enter_new_state(st,SITE_SENTMSG2);
1859                 } else {
1860                     slog(st,LOG_ERROR,"failed to process an incoming "
1861                          "crossed msg1 (we have low priority)");
1862                 }
1863                 BUF_FREE(buf);
1864                 return True;
1865             }
1866         } else if (st->state==SITE_SENTMSG2 ||
1867                    st->state==SITE_SENTMSG4) {
1868             if (consttime_memeq(named_msg.nR,st->remoteN,NONCELEN)) {
1869                 /* We are ahead in the protocol, but that msg1 had the
1870                  * peer's nonce so presumably it is from this key
1871                  * exchange run, via a slower route */
1872                 transport_setup_msgok(st,source);
1873             } else {
1874                 slog(st,LOG_UNEXPECTED,"competing incoming message 1");
1875             }
1876             BUF_FREE(buf);
1877             return True;
1878         }
1879         /* The message 1 was received at an unexpected stage of the
1880            key setup.  Well, they lost the race. */
1881         slog(st,LOG_UNEXPECTED,"unexpected incoming message 1");
1882         BUF_FREE(buf);
1883         return True;
1884     }
1885     if (msgtype==LABEL_PROD) {
1886         if (!named_for_us(st,buf,msgtype,&named_msg))
1887             return False;
1888         dump_packet(st,buf,source,True,True);
1889         if (st->state!=SITE_RUN) {
1890             slog(st,LOG_DROP,"ignoring PROD when not in state RUN");
1891         } else if (current_valid(st)) {
1892             slog(st,LOG_DROP,"ignoring PROD when we think we have a key");
1893         } else {
1894             initiate_key_setup(st,"peer sent PROD packet",source);
1895         }
1896         BUF_FREE(buf);
1897         return True;
1898     }
1899     if (dest==st->index) {
1900         /* Explicitly addressed to us */
1901         if (msgtype!=LABEL_MSG0) dump_packet(st,buf,source,True,True);
1902         switch (msgtype) {
1903         case LABEL_NAK:
1904             /* If the source is our current peer then initiate a key setup,
1905                because our peer's forgotten the key */
1906             if (get_uint32(buf->start+4)==st->current.remote_session_id) {
1907                 bool_t initiated;
1908                 initiated = initiate_key_setup(st,"received a NAK",source);
1909                 if (!initiated) generate_send_prod(st,source);
1910             } else {
1911                 slog(st,LOG_SEC,"bad incoming NAK");
1912             }
1913             break;
1914         case LABEL_MSG0:
1915             process_msg0(st,buf,source);
1916             break;
1917         case LABEL_MSG1:
1918             /* Setup packet: should not have been explicitly addressed
1919                to us */
1920             slog(st,LOG_SEC,"incoming explicitly addressed msg1");
1921             break;
1922         case LABEL_MSG2:
1923             /* Setup packet: expected only in state SENTMSG1 */
1924             if (st->state!=SITE_SENTMSG1) {
1925                 if ((st->state==SITE_SENTMSG3 ||
1926                      st->state==SITE_SENTMSG5) &&
1927                     setup_late_msg_ok(st,buf,msgtype,source))
1928                     break;
1929                 slog(st,LOG_UNEXPECTED,"unexpected MSG2");
1930             } else if (process_msg2(st,buf,source)) {
1931                 transport_setup_msgok(st,source);
1932                 enter_new_state(st,SITE_SENTMSG3);
1933             } else {
1934                 slog(st,LOG_SEC,"invalid MSG2");
1935             }
1936             break;
1937         case LABEL_MSG3:
1938         case LABEL_MSG3BIS:
1939             /* Setup packet: expected only in state SENTMSG2 */
1940             if (st->state!=SITE_SENTMSG2) {
1941                 if ((st->state==SITE_SENTMSG4) &&
1942                     setup_late_msg_ok(st,buf,msgtype,source))
1943                     break;
1944                 slog(st,LOG_UNEXPECTED,"unexpected MSG3");
1945             } else if (process_msg3(st,buf,source,msgtype)) {
1946                 transport_setup_msgok(st,source);
1947                 enter_new_state(st,SITE_SENTMSG4);
1948             } else {
1949                 slog(st,LOG_SEC,"invalid MSG3");
1950             }
1951             break;
1952         case LABEL_MSG4:
1953             /* Setup packet: expected only in state SENTMSG3 */
1954             if (st->state!=SITE_SENTMSG3) {
1955                 if ((st->state==SITE_SENTMSG5) &&
1956                     setup_late_msg_ok(st,buf,msgtype,source))
1957                     break;
1958                 slog(st,LOG_UNEXPECTED,"unexpected MSG4");
1959             } else if (process_msg4(st,buf,source)) {
1960                 transport_setup_msgok(st,source);
1961                 enter_new_state(st,SITE_SENTMSG5);
1962             } else {
1963                 slog(st,LOG_SEC,"invalid MSG4");
1964             }
1965             break;
1966         case LABEL_MSG5:
1967             /* Setup packet: expected only in state SENTMSG4 */
1968             /* (may turn up in state RUN if our return MSG6 was lost
1969                and the new key has already been activated. In that
1970                case we discard it. The peer will realise that we
1971                are using the new key when they see our data packets.
1972                Until then the peer's data packets to us get discarded. */
1973             if (st->state==SITE_SENTMSG4) {
1974                 if (process_msg5(st,buf,source,st->new_transform)) {
1975                     transport_setup_msgok(st,source);
1976                     enter_new_state(st,SITE_RUN);
1977                 } else {
1978                     slog(st,LOG_SEC,"invalid MSG5");
1979                 }
1980             } else if (st->state==SITE_RUN) {
1981                 if (process_msg5(st,buf,source,st->current.transform)) {
1982                     slog(st,LOG_DROP,"got MSG5, retransmitting MSG6");
1983                     transport_setup_msgok(st,source);
1984                     create_msg6(st,st->current.transform,
1985                                 st->current.remote_session_id);
1986                     transport_xmit(st,&st->peers,&st->buffer,True);
1987                     BUF_FREE(&st->buffer);
1988                 } else {
1989                     slog(st,LOG_SEC,"invalid MSG5 (in state RUN)");
1990                 }
1991             } else {
1992                 slog(st,LOG_UNEXPECTED,"unexpected MSG5");
1993             }
1994             break;
1995         case LABEL_MSG6:
1996             /* Setup packet: expected only in state SENTMSG5 */
1997             if (st->state!=SITE_SENTMSG5) {
1998                 slog(st,LOG_UNEXPECTED,"unexpected MSG6");
1999             } else if (process_msg6(st,buf,source)) {
2000                 BUF_FREE(&st->buffer); /* Free message 5 */
2001                 transport_setup_msgok(st,source);
2002                 activate_new_key(st);
2003             } else {
2004                 slog(st,LOG_SEC,"invalid MSG6");
2005             }
2006             break;
2007         default:
2008             slog(st,LOG_SEC,"received message of unknown type 0x%08x",
2009                  msgtype);
2010             break;
2011         }
2012         BUF_FREE(buf);
2013         return True;
2014     }
2015
2016     return False;
2017 }
2018
2019 static void site_control(void *vst, bool_t run)
2020 {
2021     struct site *st=vst;
2022     if (run) enter_state_run(st);
2023     else enter_state_stop(st);
2024 }
2025
2026 static void site_phase_hook(void *sst, uint32_t newphase)
2027 {
2028     struct site *st=sst;
2029
2030     /* The program is shutting down; tell our peer */
2031     send_msg7(st,"shutting down");
2032 }
2033
2034 static void site_childpersist_clearkeys(void *sst, uint32_t newphase)
2035 {
2036     struct site *st=sst;
2037     dispose_transform(&st->current.transform);
2038     dispose_transform(&st->auxiliary_key.transform);
2039     dispose_transform(&st->new_transform);
2040     /* Not much point overwiting the signing key, since we loaded it
2041        from disk, and it is only valid prospectively if at all,
2042        anyway. */
2043     /* XXX it would be best to overwrite the DH state, because that
2044        _is_ relevant to forward secrecy.  However we have no
2045        convenient interface for doing that and in practice gmp has
2046        probably dribbled droppings all over the malloc arena.  A good
2047        way to fix this would be to have a privsep child for asymmetric
2048        crypto operations, but that's a task for another day. */
2049 }
2050
2051 static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
2052                           list_t *args)
2053 {
2054     static uint32_t index_sequence;
2055     struct site *st;
2056     item_t *item;
2057     dict_t *dict;
2058     int i;
2059
2060     NEW(st);
2061
2062     st->cl.description="site";
2063     st->cl.type=CL_SITE;
2064     st->cl.apply=NULL;
2065     st->cl.interface=&st->ops;
2066     st->ops.st=st;
2067     st->ops.control=site_control;
2068     st->ops.status=site_status;
2069
2070     /* First parameter must be a dict */
2071     item=list_elem(args,0);
2072     if (!item || item->type!=t_dict)
2073         cfgfatal(loc,"site","parameter must be a dictionary\n");
2074     
2075     dict=item->data.dict;
2076     st->localname=dict_read_string(dict, "local-name", True, "site", loc);
2077     st->remotename=dict_read_string(dict, "name", True, "site", loc);
2078
2079     st->keepalive=dict_read_bool(dict,"keepalive",False,"site",loc,False);
2080
2081     st->peer_mobile=dict_read_bool(dict,"mobile",False,"site",loc,False);
2082     st->local_mobile=
2083         dict_read_bool(dict,"local-mobile",False,"site",loc,False);
2084
2085     /* Sanity check (which also allows the 'sites' file to include
2086        site() closures for all sites including our own): refuse to
2087        talk to ourselves */
2088     if (strcmp(st->localname,st->remotename)==0) {
2089         Message(M_DEBUG,"site %s: local-name==name -> ignoring this site\n",
2090                 st->localname);
2091         if (st->peer_mobile != st->local_mobile)
2092             cfgfatal(loc,"site","site %s's peer-mobile=%d"
2093                     " but our local-mobile=%d\n",
2094                     st->localname, st->peer_mobile, st->local_mobile);
2095         free(st);
2096         return NULL;
2097     }
2098     if (st->peer_mobile && st->local_mobile) {
2099         Message(M_WARNING,"site %s: site is mobile but so are we"
2100                 " -> ignoring this site\n", st->remotename);
2101         free(st);
2102         return NULL;
2103     }
2104
2105     assert(index_sequence < 0xffffffffUL);
2106     st->index = ++index_sequence;
2107     st->local_capabilities = 0;
2108     st->netlink=find_cl_if(dict,"link",CL_NETLINK,True,"site",loc);
2109
2110 #define GET_CLOSURE_LIST(dictkey,things,nthings,CL_TYPE) do{            \
2111     list_t *things##_cfg=dict_lookup(dict,dictkey);                     \
2112     if (!things##_cfg)                                                  \
2113         cfgfatal(loc,"site","closure list \"%s\" not found\n",dictkey); \
2114     st->nthings=list_length(things##_cfg);                              \
2115     NEW_ARY(st->things,st->nthings);                                    \
2116     assert(st->nthings);                                                \
2117     for (i=0; i<st->nthings; i++) {                                     \
2118         item_t *item=list_elem(things##_cfg,i);                         \
2119         if (item->type!=t_closure)                                      \
2120             cfgfatal(loc,"site","%s is not a closure\n",dictkey);       \
2121         closure_t *cl=item->data.closure;                               \
2122         if (cl->type!=CL_TYPE)                                          \
2123             cfgfatal(loc,"site","%s closure wrong type\n",dictkey);     \
2124         st->things[i]=cl->interface;                                    \
2125     }                                                                   \
2126 }while(0)
2127
2128     GET_CLOSURE_LIST("comm",comms,ncomms,CL_COMM);
2129
2130     NEW_ARY(st->commclientinfos, st->ncomms);
2131     dict_t *comminfo = dict_read_dict(dict,"comm-info",False,"site",loc);
2132     for (i=0; i<st->ncomms; i++) {
2133         st->commclientinfos[i] =
2134             !comminfo ? 0 :
2135             st->comms[i]->clientinfo(st->comms[i],comminfo,loc);
2136     }
2137
2138     st->resolver=find_cl_if(dict,"resolver",CL_RESOLVER,True,"site",loc);
2139     st->log=find_cl_if(dict,"log",CL_LOG,True,"site",loc);
2140     st->random=find_cl_if(dict,"random",CL_RANDOMSRC,True,"site",loc);
2141
2142     st->privkey=find_cl_if(dict,"local-key",CL_RSAPRIVKEY,True,"site",loc);
2143     st->addresses=dict_read_string_array(dict,"address",False,"site",loc,0);
2144     if (st->addresses)
2145         st->remoteport=dict_read_number(dict,"port",True,"site",loc,0);
2146     else st->remoteport=0;
2147     st->pubkey=find_cl_if(dict,"key",CL_RSAPUBKEY,True,"site",loc);
2148
2149     GET_CLOSURE_LIST("transform",transforms,ntransforms,CL_TRANSFORM);
2150
2151     st->dh=find_cl_if(dict,"dh",CL_DH,True,"site",loc);
2152     st->hash=find_cl_if(dict,"hash",CL_HASH,True,"site",loc);
2153
2154 #define DEFAULT(D) (st->peer_mobile || st->local_mobile \
2155                     ? DEFAULT_MOBILE_##D : DEFAULT_##D)
2156 #define CFG_NUMBER(k,D) dict_read_number(dict,(k),False,"site",loc,DEFAULT(D));
2157
2158     st->key_lifetime=         CFG_NUMBER("key-lifetime",  KEY_LIFETIME);
2159     st->setup_retries=        CFG_NUMBER("setup-retries", SETUP_RETRIES);
2160     st->setup_retry_interval= CFG_NUMBER("setup-timeout", SETUP_RETRY_INTERVAL);
2161     st->wait_timeout_mean=    CFG_NUMBER("wait-time",     WAIT_TIME);
2162     st->mtu_target= dict_read_number(dict,"mtu-target",False,"site",loc,0);
2163
2164     st->mobile_peer_expiry= dict_read_number(
2165        dict,"mobile-peer-expiry",False,"site",loc,DEFAULT_MOBILE_PEER_EXPIRY);
2166
2167     const char *peerskey= st->peer_mobile
2168         ? "mobile-peers-max" : "static-peers-max";
2169     st->transport_peers_max= dict_read_number(
2170         dict,peerskey,False,"site",loc, st->addresses ? 4 : 3);
2171     if (st->transport_peers_max<1 ||
2172         st->transport_peers_max>MAX_PEER_ADDRS) {
2173         cfgfatal(loc,"site", "%s must be in range 1.."
2174                  STRING(MAX_PEER_ADDRS) "\n", peerskey);
2175     }
2176
2177     if (st->key_lifetime < DEFAULT(KEY_RENEGOTIATE_GAP)*2)
2178         st->key_renegotiate_time=st->key_lifetime/2;
2179     else
2180         st->key_renegotiate_time=st->key_lifetime-DEFAULT(KEY_RENEGOTIATE_GAP);
2181     st->key_renegotiate_time=dict_read_number(
2182         dict,"renegotiate-time",False,"site",loc,st->key_renegotiate_time);
2183     if (st->key_renegotiate_time > st->key_lifetime) {
2184         cfgfatal(loc,"site",
2185                  "renegotiate-time must be less than key-lifetime\n");
2186     }
2187
2188     st->log_events=string_list_to_word(dict_lookup(dict,"log-events"),
2189                                        log_event_table,"site");
2190
2191     st->resolving_count=0;
2192     st->allow_send_prod=0;
2193
2194     st->tunname=safe_malloc(strlen(st->localname)+strlen(st->remotename)+5,
2195                             "site_apply");
2196     sprintf(st->tunname,"%s<->%s",st->localname,st->remotename);
2197
2198     /* The information we expect to see in incoming messages of type 1 */
2199     /* fixme: lots of unchecked overflows here, but the results are only
2200        corrupted packets rather than undefined behaviour */
2201     st->our_name_later=(strcmp(st->localname,st->remotename)>0);
2202
2203     buffer_new(&st->buffer,SETUP_BUFFER_LEN);
2204
2205     buffer_new(&st->scratch,SETUP_BUFFER_LEN);
2206     BUF_ALLOC(&st->scratch,"site:scratch");
2207
2208     /* We are interested in poll(), but only for timeouts. We don't have
2209        any fds of our own. */
2210     register_for_poll(st, site_beforepoll, site_afterpoll, "site");
2211     st->timeout=0;
2212
2213     st->remote_capabilities=0;
2214     st->chosen_transform=0;
2215     st->current.key_timeout=0;
2216     st->auxiliary_key.key_timeout=0;
2217     transport_peers_clear(st,&st->peers);
2218     transport_peers_clear(st,&st->setup_peers);
2219     /* XXX mlock these */
2220     st->dhsecret=safe_malloc(st->dh->len,"site:dhsecret");
2221     st->sharedsecretlen=st->sharedsecretallocd=0;
2222     st->sharedsecret=0;
2223
2224     for (i=0; i<st->ntransforms; i++) {
2225         struct transform_if *ti=st->transforms[i];
2226         uint32_t capbit = 1UL << ti->capab_transformnum;
2227         if (st->local_capabilities & capbit)
2228             slog(st,LOG_ERROR,"transformnum capability bit"
2229                  " %d (%#"PRIx32") reused", ti->capab_transformnum, capbit);
2230         st->local_capabilities |= capbit;
2231     }
2232
2233     if (st->local_mobile || st->peer_mobile)
2234         st->local_capabilities |= CAPAB_PRIORITY_MOBILE;
2235
2236     /* We need to register the remote networks with the netlink device */
2237     uint32_t netlink_mtu; /* local virtual interface mtu */
2238     st->netlink->reg(st->netlink->st, site_outgoing, st, &netlink_mtu);
2239     if (!st->mtu_target)
2240         st->mtu_target=netlink_mtu;
2241     
2242     for (i=0; i<st->ncomms; i++)
2243         st->comms[i]->request_notify(st->comms[i]->st, st, site_incoming);
2244
2245     st->current.transform=0;
2246     st->auxiliary_key.transform=0;
2247     st->new_transform=0;
2248     st->auxiliary_is_new=0;
2249
2250     enter_state_stop(st);
2251
2252     add_hook(PHASE_SHUTDOWN,site_phase_hook,st);
2253     add_hook(PHASE_CHILDPERSIST,site_childpersist_clearkeys,st);
2254
2255     return new_closure(&st->cl);
2256 }
2257
2258 void site_module(dict_t *dict)
2259 {
2260     add_closure(dict,"site",site_apply);
2261 }
2262
2263
2264 /***** TRANSPORT PEERS definitions *****/
2265
2266 static void transport_peers_debug(struct site *st, transport_peers *dst,
2267                                   const char *didwhat,
2268                                   int nargs, const struct comm_addr *args,
2269                                   size_t stride) {
2270     int i;
2271     char *argp;
2272
2273     if (!(st->log_events & LOG_PEER_ADDRS))
2274         return; /* an optimisation */
2275
2276     slog(st, LOG_PEER_ADDRS, "peers (%s) %s nargs=%d => npeers=%d",
2277          (dst==&st->peers ? "data" :
2278           dst==&st->setup_peers ? "setup" : "UNKNOWN"),
2279          didwhat, nargs, dst->npeers);
2280
2281     for (i=0, argp=(void*)args;
2282          i<nargs;
2283          i++, (argp+=stride?stride:sizeof(*args))) {
2284         const struct comm_addr *ca=(void*)argp;
2285         slog(st, LOG_PEER_ADDRS, " args: addrs[%d]=%s",
2286              i, comm_addr_to_string(ca));
2287     }
2288     for (i=0; i<dst->npeers; i++) {
2289         struct timeval diff;
2290         timersub(tv_now,&dst->peers[i].last,&diff);
2291         const struct comm_addr *ca=&dst->peers[i].addr;
2292         slog(st, LOG_PEER_ADDRS, " peers: addrs[%d]=%s T-%ld.%06ld",
2293              i, comm_addr_to_string(ca),
2294              (unsigned long)diff.tv_sec, (unsigned long)diff.tv_usec);
2295     }
2296 }
2297
2298 static void transport_peers_expire(struct site *st, transport_peers *peers) {
2299     /* peers must be sorted first */
2300     int previous_peers=peers->npeers;
2301     struct timeval oldest;
2302     oldest.tv_sec  = tv_now->tv_sec - st->mobile_peer_expiry;
2303     oldest.tv_usec = tv_now->tv_usec;
2304     while (peers->npeers>1 &&
2305            timercmp(&peers->peers[peers->npeers-1].last, &oldest, <))
2306         peers->npeers--;
2307     if (peers->npeers != previous_peers)
2308         transport_peers_debug(st,peers,"expire", 0,0,0);
2309 }
2310
2311 static bool_t transport_peer_record_one(struct site *st, transport_peers *peers,
2312                                         const struct comm_addr *ca,
2313                                         const struct timeval *tv) {
2314     /* returns false if output is full */
2315     int search;
2316
2317     if (peers->npeers >= st->transport_peers_max)
2318         return 0;
2319
2320     for (search=0; search<peers->npeers; search++)
2321         if (comm_addr_equal(&peers->peers[search].addr, ca))
2322             return 1;
2323
2324     peers->peers[peers->npeers].addr = *ca;
2325     peers->peers[peers->npeers].last = *tv;
2326     peers->npeers++;
2327     return 1;
2328 }
2329
2330 static void transport_record_peers(struct site *st, transport_peers *peers,
2331                                    const struct comm_addr *addrs, int naddrs,
2332                                    const char *m) {
2333     /* We add addrs into peers.  The new entries end up at the front
2334      * and displace entries towards the end (perhaps even off the
2335      * end).  Any existing matching entries are moved up to the front.
2336      *
2337      * Caller must first call transport_peers_expire. */
2338
2339     if (naddrs==1) {
2340         /* avoids debug for uninteresting updates */
2341         int i;
2342         for (i=0; i<peers->npeers; i++) {
2343             if (comm_addr_equal(&addrs[0], &peers->peers[i].addr)) {
2344                 memmove(peers->peers+1, peers->peers,
2345                         sizeof(peers->peers[0]) * i);
2346                 peers->peers[0].addr = addrs[0];
2347                 peers->peers[0].last = *tv_now;
2348                 return;
2349             }
2350         }
2351     }
2352
2353     int old_npeers=peers->npeers;
2354     transport_peer old_peers[old_npeers];
2355     COPY_ARRAY(old_peers,peers->peers,old_npeers);
2356
2357     peers->npeers=0;
2358     int i;
2359     for (i=0; i<naddrs; i++) {
2360         if (!transport_peer_record_one(st,peers, &addrs[i], tv_now))
2361             break;
2362     }
2363     for (i=0; i<old_npeers; i++) {
2364         const transport_peer *old=&old_peers[i];
2365         if (!transport_peer_record_one(st,peers, &old->addr, &old->last))
2366             break;
2367     }
2368
2369     transport_peers_debug(st,peers,m, naddrs,addrs,0);
2370 }
2371
2372 static void transport_expire_record_peers(struct site *st,
2373                                           transport_peers *peers,
2374                                           const struct comm_addr *addrs,
2375                                           int naddrs, const char *m) {
2376     /* Convenience function */
2377     transport_peers_expire(st,peers);
2378     transport_record_peers(st,peers,addrs,naddrs,m);
2379 }
2380
2381 static bool_t transport_compute_setupinit_peers(struct site *st,
2382         const struct comm_addr *configured_addrs /* 0 if none or not found */,
2383         int n_configured_addrs /* 0 if none or not found */,
2384         const struct comm_addr *incoming_packet_addr /* 0 if none */) {
2385     if (!n_configured_addrs && !incoming_packet_addr &&
2386         !transport_peers_valid(&st->peers))
2387         return False;
2388
2389     slog(st,LOG_SETUP_INIT,
2390          "using: %d configured addr(s);%s %d old peer addrs(es)",
2391          n_configured_addrs,
2392          incoming_packet_addr ? " incoming packet address;" : "",
2393          st->peers.npeers);
2394
2395     /* Non-mobile peers try addresses until one is plausible.  The
2396      * effect is that this code always tries first the configured
2397      * address if supplied, or otherwise the address of the incoming
2398      * PROD, or finally the existing data peer if one exists; this is
2399      * as desired. */
2400
2401     transport_peers_copy(st,&st->setup_peers,&st->peers);
2402     transport_peers_expire(st,&st->setup_peers);
2403
2404     if (incoming_packet_addr)
2405         transport_record_peers(st,&st->setup_peers,
2406                                incoming_packet_addr,1, "incoming");
2407
2408     if (n_configured_addrs)
2409         transport_record_peers(st,&st->setup_peers,
2410                               configured_addrs,n_configured_addrs, "setupinit");
2411
2412     assert(transport_peers_valid(&st->setup_peers));
2413     return True;
2414 }
2415
2416 static void transport_setup_msgok(struct site *st, const struct comm_addr *a) {
2417     if (st->peer_mobile)
2418         transport_expire_record_peers(st,&st->setup_peers,a,1,"setupmsg");
2419 }
2420 static void transport_data_msgok(struct site *st, const struct comm_addr *a) {
2421     if (st->peer_mobile)
2422         transport_expire_record_peers(st,&st->peers,a,1,"datamsg");
2423 }
2424
2425 static int transport_peers_valid(transport_peers *peers) {
2426     return peers->npeers;
2427 }
2428 static void transport_peers_clear(struct site *st, transport_peers *peers) {
2429     peers->npeers= 0;
2430     transport_peers_debug(st,peers,"clear",0,0,0);
2431 }
2432 static void transport_peers_copy(struct site *st, transport_peers *dst,
2433                                  const transport_peers *src) {
2434     dst->npeers=src->npeers;
2435     COPY_ARRAY(dst->peers, src->peers, dst->npeers);
2436     transport_peers_debug(st,dst,"copy",
2437                           src->npeers, &src->peers->addr, sizeof(*src->peers));
2438 }
2439
2440 static void transport_resolve_complete(struct site *st,
2441                                        const struct comm_addr *addrs,
2442                                        int naddrs) {
2443     transport_expire_record_peers(st,&st->peers,addrs,naddrs,
2444                                   "resolved data");
2445     transport_expire_record_peers(st,&st->setup_peers,addrs,naddrs,
2446                                   "resolved setup");
2447 }
2448
2449 static void transport_resolve_complete_tardy(struct site *st,
2450                                              const struct comm_addr *addrs,
2451                                              int naddrs) {
2452     transport_expire_record_peers(st,&st->peers,addrs,naddrs,
2453                                   "resolved tardily");
2454 }
2455
2456 static void transport_peers__copy_by_mask(transport_peer *out, int *nout_io,
2457                                           unsigned mask,
2458                                           const transport_peers *inp) {
2459     /* out and in->peers may be the same region, or nonoverlapping */
2460     const transport_peer *in=inp->peers;
2461     int slot;
2462     for (slot=0; slot<inp->npeers; slot++) {
2463         if (!(mask & (1U << slot)))
2464             continue;
2465         if (!(out==in && slot==*nout_io))
2466             COPY_OBJ(out[*nout_io], in[slot]);
2467         (*nout_io)++;
2468     }
2469 }
2470
2471 void transport_xmit(struct site *st, transport_peers *peers,
2472                     struct buffer_if *buf, bool_t candebug) {
2473     int slot;
2474     transport_peers_expire(st, peers);
2475     unsigned failed=0; /* bitmask */
2476     assert(MAX_PEER_ADDRS < sizeof(unsigned)*CHAR_BIT);
2477
2478     int nfailed=0;
2479     for (slot=0; slot<peers->npeers; slot++) {
2480         transport_peer *peer=&peers->peers[slot];
2481         bool_t ok = comm_addr_sendmsg(st, &peer->addr, buf);
2482         if (candebug)
2483             dump_packet(st, buf, &peer->addr, False, ok);
2484         if (!ok) {
2485             failed |= 1U << slot;
2486             nfailed++;
2487         }
2488         if (ok && !st->peer_mobile)
2489             break;
2490     }
2491     /* Now we need to demote/delete failing addrs: if we are mobile we
2492      * merely demote them; otherwise we delete them. */
2493     if (st->local_mobile) {
2494         unsigned expected = ((1U << nfailed)-1) << (peers->npeers-nfailed);
2495         /* `expected' has all the failures at the end already */
2496         if (failed != expected) {
2497             int fslot=0;
2498             transport_peer failedpeers[nfailed];
2499             transport_peers__copy_by_mask(failedpeers, &fslot, failed,peers);
2500             assert(fslot == nfailed);
2501             int wslot=0;
2502             transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
2503             assert(wslot+nfailed == peers->npeers);
2504             COPY_ARRAY(peers->peers+wslot, failedpeers, nfailed);
2505             transport_peers_debug(st,peers,"mobile failure reorder",0,0,0);
2506         }
2507     } else {
2508         if (failed && peers->npeers > 1) {
2509             int wslot=0;
2510             transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
2511             peers->npeers=wslot;
2512             transport_peers_debug(st,peers,"non-mobile failure cleanup",0,0,0);
2513         }
2514     }
2515 }
2516
2517 /***** END of transport peers declarations *****/