chiark / gitweb /
ce185d4d46aa03ee4e4c2d6b215deb88278351cf
[secnet.git] / site.c
1 /* site.c - manage communication with a remote network site */
2
3 /*
4  * This file is part of secnet.
5  * See README for full list of copyright holders.
6  *
7  * secnet is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3 of the License, or
10  * (at your option) any later version.
11  * 
12  * secnet is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * version 3 along with secnet; if not, see
19  * https://www.gnu.org/licenses/gpl.html.
20  */
21
22 /* The 'site' code doesn't know anything about the structure of the
23    packets it's transmitting.  In fact, under the new netlink
24    configuration scheme it doesn't need to know anything at all about
25    IP addresses, except how to contact its peer.  This means it could
26    potentially be used to tunnel other protocols too (IPv6, IPX, plain
27    old Ethernet frames) if appropriate netlink code can be written
28    (and that ought not to be too hard, eg. using the TUN/TAP device to
29    pretend to be an Ethernet interface).  */
30
31 /* At some point in the future the netlink code will be asked for
32    configuration information to go in the PING/PONG packets at the end
33    of the key exchange. */
34
35 #include "secnet.h"
36 #include <stdio.h>
37 #include <string.h>
38 #include <limits.h>
39 #include <assert.h>
40 #include <sys/socket.h>
41
42 #include <sys/mman.h>
43 #include "util.h"
44 #include "unaligned.h"
45 #include "magic.h"
46
47 #define SETUP_BUFFER_LEN 2048
48
49 #define DEFAULT_KEY_LIFETIME                  (3600*1000) /* [ms] */
50 #define DEFAULT_KEY_RENEGOTIATE_GAP           (5*60*1000) /* [ms] */
51 #define DEFAULT_SETUP_RETRIES 5
52 #define DEFAULT_SETUP_RETRY_INTERVAL             (2*1000) /* [ms] */
53 #define DEFAULT_WAIT_TIME                       (20*1000) /* [ms] */
54
55 #define DEFAULT_MOBILE_KEY_LIFETIME      (2*24*3600*1000) /* [ms] */
56 #define DEFAULT_MOBILE_KEY_RENEGOTIATE_GAP (12*3600*1000) /* [ms] */
57 #define DEFAULT_MOBILE_SETUP_RETRIES 30
58 #define DEFAULT_MOBILE_SETUP_RETRY_INTERVAL      (1*1000) /* [ms] */
59 #define DEFAULT_MOBILE_WAIT_TIME                (10*1000) /* [ms] */
60
61 #define DEFAULT_MOBILE_PEER_EXPIRY            (2*60)      /* [s] */
62
63 /* Each site can be in one of several possible states. */
64
65 /* States:
66    SITE_STOP         - nothing is allowed to happen; tunnel is down;
67                        all session keys have been erased
68      -> SITE_RUN upon external instruction
69    SITE_RUN          - site up, maybe with valid key
70      -> SITE_RESOLVE upon outgoing packet and no valid key
71          we start name resolution for the other end of the tunnel
72      -> SITE_SENTMSG2 upon valid incoming message 1 and suitable time
73          we send an appropriate message 2
74    SITE_RESOLVE      - waiting for name resolution
75      -> SITE_SENTMSG1 upon successful resolution
76          we send an appropriate message 1
77      -> SITE_SENTMSG2 upon valid incoming message 1 (then abort resolution)
78          we abort resolution and 
79      -> SITE_WAIT on timeout or resolution failure
80    SITE_SENTMSG1
81      -> SITE_SENTMSG2 upon valid incoming message 1 from higher priority end
82      -> SITE_SENTMSG3 upon valid incoming message 2
83      -> SITE_WAIT on timeout
84    SITE_SENTMSG2
85      -> SITE_SENTMSG4 upon valid incoming message 3
86      -> SITE_WAIT on timeout
87    SITE_SENTMSG3
88      -> SITE_SENTMSG5 upon valid incoming message 4
89      -> SITE_WAIT on timeout
90    SITE_SENTMSG4
91      -> SITE_RUN upon valid incoming message 5
92      -> SITE_WAIT on timeout
93    SITE_SENTMSG5
94      -> SITE_RUN upon valid incoming message 6
95      -> SITE_WAIT on timeout
96    SITE_WAIT         - failed to establish key; do nothing for a while
97      -> SITE_RUN on timeout
98    */
99
100 #define SITE_STOP     0
101 #define SITE_RUN      1
102 #define SITE_RESOLVE  2
103 #define SITE_SENTMSG1 3
104 #define SITE_SENTMSG2 4
105 #define SITE_SENTMSG3 5
106 #define SITE_SENTMSG4 6
107 #define SITE_SENTMSG5 7
108 #define SITE_WAIT     8
109
110 #define CASES_MSG3_KNOWN LABEL_MSG3: case LABEL_MSG3BIS
111
112 struct msg;
113
114 int32_t site_max_start_pad = 4*4;
115
116 static cstring_t state_name(uint32_t state)
117 {
118     switch (state) {
119     case 0: return "STOP";
120     case 1: return "RUN";
121     case 2: return "RESOLVE";
122     case 3: return "SENTMSG1";
123     case 4: return "SENTMSG2";
124     case 5: return "SENTMSG3";
125     case 6: return "SENTMSG4";
126     case 7: return "SENTMSG5";
127     case 8: return "WAIT";
128     default: return "*bad state*";
129     }
130 }
131
132 #define NONCELEN 8
133
134 #define LOG_UNEXPECTED    0x00000001
135 #define LOG_SETUP_INIT    0x00000002
136 #define LOG_SETUP_TIMEOUT 0x00000004
137 #define LOG_ACTIVATE_KEY  0x00000008
138 #define LOG_TIMEOUT_KEY   0x00000010
139 #define LOG_SEC           0x00000020
140 #define LOG_STATE         0x00000040
141 #define LOG_DROP          0x00000080
142 #define LOG_DUMP          0x00000100
143 #define LOG_ERROR         0x00000400
144 #define LOG_PEER_ADDRS    0x00000800
145 #define LOG_SIGKEYS       0x00001000
146
147 static struct flagstr log_event_table[]={
148     { "unexpected", LOG_UNEXPECTED },
149     { "setup-init", LOG_SETUP_INIT },
150     { "setup-timeout", LOG_SETUP_TIMEOUT },
151     { "activate-key", LOG_ACTIVATE_KEY },
152     { "timeout-key", LOG_TIMEOUT_KEY },
153     { "security", LOG_SEC },
154     { "state-change", LOG_STATE },
155     { "packet-drop", LOG_DROP },
156     { "dump-packets", LOG_DUMP },
157     { "errors", LOG_ERROR },
158     { "peer-addrs", LOG_PEER_ADDRS },
159     { "sigkeys", LOG_SIGKEYS },
160     { "default", LOG_SETUP_INIT|LOG_SETUP_TIMEOUT|
161       LOG_ACTIVATE_KEY|LOG_TIMEOUT_KEY|LOG_SEC|LOG_ERROR|LOG_SIGKEYS },
162     { "all", 0xffffffff },
163     { NULL, 0 }
164 };
165
166
167 /***** TRANSPORT PEERS declarations *****/
168
169 /* Details of "mobile peer" semantics:
170
171    - We use the same data structure for the different configurations,
172      but manage it with different algorithms.
173    
174    - We record up to mobile_peers_max peer address/port numbers
175      ("peers") for key setup, and separately up to mobile_peers_max
176      for data transfer.
177
178    - In general, we make a new set of addrs (see below) when we start
179      a new key exchange; the key setup addrs become the data transport
180      addrs when key setup complets.
181
182    If our peer is mobile:
183
184    - We send to all recent addresses of incoming packets, plus
185      initially all configured addresses (which we also expire).
186
187    - So, we record addrs of good incoming packets, as follows:
188       1. expire any peers last seen >120s ("mobile-peer-expiry") ago
189       2. add the peer of the just received packet to the applicable list
190          (possibly evicting the oldest entries to make room)
191      NB that we do not expire peers until an incoming packet arrives.
192
193    - If the peer has a configured address or name, we record them the
194      same way, but only as a result of our own initiation of key
195      setup.  (We might evict some incoming packet addrs to make room.)
196
197    - The default number of addrs to keep is 3, or 4 if we have a
198      configured name or address.  That's space for two configured
199      addresses (one IPv6 and one IPv4), plus two received addresses.
200
201    - Outgoing packets are sent to every recorded address in the
202      applicable list.  Any unsupported[1] addresses are deleted from
203      the list right away.  (This should only happen to configured
204      addresses, of course, but there is no need to check that.)
205
206    - When we successfully complete a key setup, we merge the key setup
207      peers into the data transfer peers.
208
209    [1] An unsupported address is one for whose AF we don't have a
210      socket (perhaps because we got EAFNOSUPPORT or some such) or for
211      which sendto gives ENETUNREACH.
212
213    If neither end is mobile:
214
215    - When peer initiated the key exchange, we use the incoming packet
216      address.
217
218    - When we initiate the key exchange, we try configured addresses
219      until we get one which isn't unsupported then fixate on that.
220
221    - When we complete a key setup, we replace the data transport peers
222      with those from the key setup.
223
224    If we are mobile:
225
226    - We can't tell when local network setup changes so we can't cache
227      the unsupported addrs and completely remove the spurious calls to
228      sendto, but we can optimise things a bit by deprioritising addrs
229      which seem to be unsupported.
230
231    - Use only configured addresses.  (Except, that if our peer
232      initiated a key exchange we use the incoming packet address until
233      our name resolution completes.)
234
235    - When we send a packet, try each address in turn; if addr
236      supported, put that address to the end of the list for future
237      packets, and go onto the next address.
238
239    - When we complete a key setup, we replace the data transport peers
240      with those from the key setup.
241
242    */
243
244 typedef struct {
245     struct timeval last;
246     struct comm_addr addr;
247 } transport_peer;
248
249 typedef struct {
250 /* configuration information */
251 /* runtime information */
252     int npeers;
253     transport_peer peers[MAX_PEER_ADDRS];
254 } transport_peers;
255
256 /* Basic operations on transport peer address sets */
257 static void transport_peers_clear(struct site *st, transport_peers *peers);
258 static int transport_peers_valid(transport_peers *peers);
259 static void transport_peers_copy(struct site *st, transport_peers *dst,
260                                  const transport_peers *src);
261
262 /* Record address of incoming setup packet; resp. data packet. */
263 static void transport_setup_msgok(struct site *st, const struct comm_addr *a);
264 static void transport_data_msgok(struct site *st, const struct comm_addr *a);
265
266 /* Initialise the setup addresses.  Called before we send the first
267  * packet in a key exchange.  If we are the initiator, as a result of
268  * resolve completing (or being determined not to be relevant) or an
269  * incoming PROD; if we are the responder, as a result of the MSG1. */
270 static bool_t transport_compute_setupinit_peers(struct site *st,
271         const struct comm_addr *configured_addrs /* 0 if none or not found */,
272         int n_configured_addrs /* 0 if none or not found */,
273         const struct comm_addr *incoming_packet_addr /* 0 if none */);
274
275 /* Called if we are the responder in a key setup, when the resolve
276  * completes.  transport_compute_setupinit_peers will hvae been called
277  * earlier.  If _complete is called, we are still doing the key setup
278  * (and we should use the new values for both the rest of the key
279  * setup and the ongoing data exchange); if _tardy is called, the key
280  * setup is done (either completed or not) and only the data peers are
281  * relevant */
282 static void transport_resolve_complete(struct site *st,
283         const struct comm_addr *addrs, int naddrs);
284 static void transport_resolve_complete_tardy(struct site *st,
285         const struct comm_addr *addrs, int naddrs);
286
287 static void transport_xmit(struct site *st, transport_peers *peers,
288                            struct buffer_if *buf, bool_t candebug);
289
290  /***** END of transport peers declarations *****/
291
292
293 struct data_key {
294     struct transform_inst_if *transform;
295     uint64_t key_timeout; /* End of life of current key */
296     uint32_t remote_session_id;
297 };
298
299 struct site {
300     closure_t cl;
301     struct site_if ops;
302 /* configuration information */
303     string_t localname;
304     string_t remotename;
305     bool_t keepalive;
306     bool_t local_mobile, peer_mobile; /* Mobile client support */
307     int32_t transport_peers_max;
308     string_t tunname; /* localname<->remotename by default, used in logs */
309     cstring_t *addresses; /* DNS name or address(es) for bootstrapping, optional */
310     int remoteport; /* Port for bootstrapping, optional */
311     uint32_t mtu_target;
312     struct netlink_if *netlink;
313     struct comm_if **comms;
314     struct comm_clientinfo **commclientinfos;
315     int ncomms;
316     struct resolver_if *resolver;
317     struct log_if *log;
318     struct random_if *random;
319     struct sigprivkey_if *privkey;
320     struct sigpubkey_if *pubkey;
321     struct transform_if **transforms;
322     int ntransforms;
323     struct dh_if *dh;
324
325     uint32_t index; /* Index of this site */
326     uint32_t early_capabilities;
327     uint32_t local_capabilities;
328     int32_t setup_retries; /* How many times to send setup packets */
329     int32_t setup_retry_interval; /* Initial timeout for setup packets */
330     int32_t wait_timeout_mean; /* How long to wait if setup unsuccessful */
331     int32_t mobile_peer_expiry; /* How long to remember 2ary addresses */
332     int32_t key_lifetime; /* How long a key lasts once set up */
333     int32_t key_renegotiate_time; /* If we see traffic (or a keepalive)
334                                       after this time, initiate a new
335                                       key exchange */
336
337     bool_t our_name_later; /* our name > peer name */
338     uint32_t log_events;
339
340 /* runtime information */
341     uint32_t state;
342     uint64_t now; /* Most recently seen time */
343     bool_t allow_send_prod;
344     bool_t msg1_crossed_logged;
345     int resolving_count;
346     int resolving_n_results_all;
347     int resolving_n_results_stored;
348     struct comm_addr resolving_results[MAX_PEER_ADDRS];
349
350     /* The currently established session */
351     struct data_key current;
352     struct data_key auxiliary_key;
353     bool_t auxiliary_is_new;
354     uint64_t renegotiate_key_time; /* When we can negotiate a new key */
355     uint64_t auxiliary_renegotiate_key_time;
356     transport_peers peers; /* Current address(es) of peer for data traffic */
357
358     /* The current key setup protocol exchange.  We can only be
359        involved in one of these at a time.  There's a potential for
360        denial of service here (the attacker keeps sending a setup
361        packet; we keep trying to continue the exchange, and have to
362        timeout before we can listen for another setup packet); perhaps
363        we should keep a list of 'bad' sources for setup packets. */
364     uint32_t remote_capabilities;
365     uint16_t remote_adv_mtu;
366     struct transform_if *chosen_transform;
367     uint32_t setup_session_id;
368     transport_peers setup_peers;
369     uint8_t localN[NONCELEN]; /* Nonces for key exchange */
370     uint8_t remoteN[NONCELEN];
371     struct buffer_if buffer; /* Current outgoing key exchange packet */
372     struct buffer_if scratch;
373     int32_t retries; /* Number of retries remaining */
374     uint64_t timeout; /* Timeout for current state */
375     uint8_t *dhsecret;
376     uint8_t *sharedsecret;
377     uint32_t sharedsecretlen, sharedsecretallocd;
378     struct transform_inst_if *new_transform; /* For key setup/verify */
379 };
380
381 static uint32_t event_log_priority(struct site *st, uint32_t event)
382 {
383     if (!(event&st->log_events))
384         return 0;
385     switch(event) {
386     case LOG_UNEXPECTED:    return M_INFO;
387     case LOG_SETUP_INIT:    return M_INFO;
388     case LOG_SETUP_TIMEOUT: return M_NOTICE;
389     case LOG_ACTIVATE_KEY:  return M_INFO;
390     case LOG_TIMEOUT_KEY:   return M_INFO;
391     case LOG_SEC:           return M_SECURITY;
392     case LOG_STATE:         return M_DEBUG;
393     case LOG_DROP:          return M_DEBUG;
394     case LOG_DUMP:          return M_DEBUG;
395     case LOG_ERROR:         return M_ERR;
396     case LOG_PEER_ADDRS:    return M_DEBUG;
397     case LOG_SIGKEYS:       return M_INFO;
398     default:                return M_ERR;
399     }
400 }
401
402 static uint32_t slog_start(struct site *st, uint32_t event)
403 {
404     uint32_t class=event_log_priority(st, event);
405     if (class) {
406         slilog_part(st->log,class,"%s: ",st->tunname);
407     }
408     return class;
409 }
410
411 static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
412 FORMAT(printf,3,0);
413 static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
414 {
415     uint32_t class;
416
417     class=slog_start(st,event);
418     if (class) {
419         vslilog_part(st->log,class,msg,ap);
420         slilog_part(st->log,class,"\n");
421     }
422 }
423
424 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
425 FORMAT(printf,3,4);
426 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
427 {
428     va_list ap;
429     va_start(ap,msg);
430     vslog(st,event,msg,ap);
431     va_end(ap);
432 }
433
434 static void logtimeout(struct site *st, const char *fmt, ...)
435 FORMAT(printf,2,3);
436 static void logtimeout(struct site *st, const char *fmt, ...)
437 {
438     uint32_t class=event_log_priority(st,LOG_SETUP_TIMEOUT);
439     if (!class)
440         return;
441
442     va_list ap;
443     va_start(ap,fmt);
444
445     slilog_part(st->log,class,"%s: ",st->tunname);
446     vslilog_part(st->log,class,fmt,ap);
447
448     const char *delim;
449     int i;
450     for (i=0, delim=" (tried ";
451          i<st->setup_peers.npeers;
452          i++, delim=", ") {
453         transport_peer *peer=&st->setup_peers.peers[i];
454         const char *s=comm_addr_to_string(&peer->addr);
455         slilog_part(st->log,class,"%s%s",delim,s);
456     }
457
458     slilog_part(st->log,class,")\n");
459     va_end(ap);
460 }
461
462 static void set_link_quality(struct site *st);
463 static void delete_keys(struct site *st, cstring_t reason, uint32_t loglevel);
464 static void delete_one_key(struct site *st, struct data_key *key,
465                            const char *reason /* may be 0 meaning don't log*/,
466                            const char *which /* ignored if !reasonn */,
467                            uint32_t loglevel /* ignored if !reasonn */);
468 static bool_t initiate_key_setup(struct site *st, cstring_t reason,
469                                  const struct comm_addr *prod_hint);
470 static void enter_state_run(struct site *st);
471 static bool_t enter_state_resolve(struct site *st);
472 static void decrement_resolving_count(struct site *st, int by);
473 static bool_t enter_new_state(struct site *st,uint32_t next,
474                               const struct msg *prompt
475                               /* may be 0 for SENTMSG1 */);
476 static void enter_state_wait(struct site *st);
477 static void activate_new_key(struct site *st);
478
479 static bool_t is_transform_valid(struct transform_inst_if *transform)
480 {
481     return transform && transform->valid(transform->st);
482 }
483
484 static bool_t current_valid(struct site *st)
485 {
486     return is_transform_valid(st->current.transform);
487 }
488
489 #define DEFINE_CALL_TRANSFORM(fwdrev)                                   \
490 static transform_apply_return                                           \
491 call_transform_##fwdrev(struct site *st,                                \
492                                    struct transform_inst_if *transform, \
493                                    struct buffer_if *buf,               \
494                                    const char **errmsg)                 \
495 {                                                                       \
496     if (!is_transform_valid(transform)) {                               \
497         *errmsg="transform not set up";                                 \
498         return transform_apply_err;                                     \
499     }                                                                   \
500     return transform->fwdrev(transform->st,buf,errmsg);                 \
501 }
502
503 DEFINE_CALL_TRANSFORM(forwards)
504 DEFINE_CALL_TRANSFORM(reverse)
505
506 static void dispose_transform(struct transform_inst_if **transform_var)
507 {
508     struct transform_inst_if *transform=*transform_var;
509     if (transform) {
510         transform->delkey(transform->st);
511         transform->destroy(transform->st);
512     }
513     *transform_var = 0;
514 }    
515
516 #define CHECK_AVAIL(b,l) do { if ((b)->size<(l)) return False; } while(0)
517 #define CHECK_EMPTY(b) do { if ((b)->size!=0) return False; } while(0)
518 #define CHECK_TYPE(b,t) do { uint32_t type; \
519     CHECK_AVAIL((b),4); \
520     type=buf_unprepend_uint32((b)); \
521     if (type!=(t)) return False; } while(0)
522
523 static _Bool type_is_msg34(uint32_t type)
524 {
525     switch (type) {
526         case CASES_MSG3_KNOWN: case LABEL_MSG4: return True;
527         default: return False;
528     }
529 }
530
531 struct parsedname {
532     int32_t len;
533     uint8_t *name;
534     struct buffer_if extrainfo;
535 };
536
537 struct msg {
538     uint8_t *hashstart;
539     uint32_t dest;
540     uint32_t source;
541     struct parsedname remote;
542     struct parsedname local;
543     uint32_t remote_capabilities;
544     uint16_t remote_mtu;
545     int capab_transformnum;
546     uint8_t *nR;
547     uint8_t *nL;
548     int32_t pklen;
549     char *pk;
550     int32_t hashlen;
551     struct alg_msg_data sig;
552 };
553
554 static int32_t wait_timeout(struct site *st) {
555     int32_t t = st->wait_timeout_mean;
556     int8_t factor;
557     if (t < INT_MAX/2) {
558         st->random->generate(st->random->st,sizeof(factor),&factor);
559         t += (t / 256) * factor;
560     }
561     return t;
562 }
563
564 static _Bool set_new_transform(struct site *st, char *pk)
565 {
566     _Bool ok;
567
568     /* Make room for the shared key */
569     st->sharedsecretlen=st->chosen_transform->keylen?:st->dh->ceil_len;
570     assert(st->sharedsecretlen);
571     if (st->sharedsecretlen > st->sharedsecretallocd) {
572         st->sharedsecretallocd=st->sharedsecretlen;
573         st->sharedsecret=safe_realloc_ary(st->sharedsecret,1,
574                                           st->sharedsecretallocd,
575                                           "site:sharedsecret");
576     }
577
578     /* Generate the shared key */
579     st->dh->makeshared(st->dh->st,st->dhsecret,st->dh->len,pk,
580                        st->sharedsecret,st->sharedsecretlen);
581
582     /* Set up the transform */
583     struct transform_if *generator=st->chosen_transform;
584     struct transform_inst_if *generated=generator->create(generator->st);
585     ok = generated->setkey(generated->st,st->sharedsecret,
586                            st->sharedsecretlen,st->our_name_later);
587
588     dispose_transform(&st->new_transform);
589     if (!ok) return False;
590     st->new_transform=generated;
591
592     slog(st,LOG_SETUP_INIT,"key exchange negotiated transform"
593          " %d (capabilities ours=%#"PRIx32" theirs=%#"PRIx32")",
594          st->chosen_transform->capab_bit,
595          st->local_capabilities, st->remote_capabilities);
596     return True;
597 }
598
599 struct xinfoadd {
600     int32_t lenpos, afternul;
601 };
602 static void append_string_xinfo_start(struct buffer_if *buf,
603                                       struct xinfoadd *xia,
604                                       const char *str)
605     /* Helps construct one of the names with additional info as found
606      * in MSG1..4.  Call this function first, then append all the
607      * desired extra info (not including the nul byte) to the buffer,
608      * then call append_string_xinfo_done. */
609 {
610     xia->lenpos = buf->size;
611     buf_append_string(buf,str);
612     buf_append_uint8(buf,0);
613     xia->afternul = buf->size;
614 }
615 static void append_string_xinfo_done(struct buffer_if *buf,
616                                      struct xinfoadd *xia)
617 {
618     /* we just need to adjust the string length */
619     if (buf->size == xia->afternul) {
620         /* no extra info, strip the nul too */
621         buf_unappend_uint8(buf);
622     } else {
623         put_uint16(buf->start+xia->lenpos, buf->size-(xia->lenpos+2));
624     }
625 }
626
627 /* Build any of msg1 to msg4. msg5 and msg6 are built from the inside
628    out using a transform of config data supplied by netlink */
629 static bool_t generate_msg(struct site *st, uint32_t type, cstring_t what,
630                            const struct msg *prompt
631                            /* may be 0 for MSG1 */)
632 {
633     string_t dhpub;
634     unsigned minor;
635
636     st->retries=st->setup_retries;
637     BUF_ALLOC(&st->buffer,what);
638     buffer_init(&st->buffer,0);
639     buf_append_uint32(&st->buffer,
640         (type==LABEL_MSG1?0:st->setup_session_id));
641     buf_append_uint32(&st->buffer,st->index);
642     buf_append_uint32(&st->buffer,type);
643
644     struct xinfoadd xia;
645     append_string_xinfo_start(&st->buffer,&xia,st->localname);
646     if ((st->local_capabilities & st->early_capabilities) ||
647         (type != LABEL_MSG1)) {
648         buf_append_uint32(&st->buffer,st->local_capabilities);
649     }
650     if (type_is_msg34(type)) {
651         buf_append_uint16(&st->buffer,st->mtu_target);
652     }
653     struct sigprivkey_if *privkey=st->privkey;
654     append_string_xinfo_done(&st->buffer,&xia);
655
656     buf_append_string(&st->buffer,st->remotename);
657     BUF_ADD_OBJ(append,&st->buffer,st->localN);
658     if (type==LABEL_MSG1) return True;
659     BUF_ADD_OBJ(append,&st->buffer,st->remoteN);
660     if (type==LABEL_MSG2) return True;
661
662     if (hacky_par_mid_failnow()) return False;
663
664     if (MSGMAJOR(type) == 3) do {
665         minor = MSGMINOR(type);
666         if (minor < 1) break;
667         buf_append_uint8(&st->buffer,st->chosen_transform->capab_bit);
668     } while (0);
669
670     dhpub=st->dh->makepublic(st->dh->st,st->dhsecret,st->dh->len);
671     buf_append_string(&st->buffer,dhpub);
672     free(dhpub);
673
674     bool_t ok=privkey->sign(privkey->st,
675                             st->buffer.start,
676                             st->buffer.size,
677                             &st->buffer);
678     if (!ok) goto fail;
679     return True;
680
681  fail:
682     return False;
683 }
684
685 static bool_t unpick_name(struct buffer_if *msg, struct parsedname *nm)
686 {
687     CHECK_AVAIL(msg,2);
688     nm->len=buf_unprepend_uint16(msg);
689     CHECK_AVAIL(msg,nm->len);
690     nm->name=buf_unprepend(msg,nm->len);
691     uint8_t *nul=memchr(nm->name,0,nm->len);
692     if (!nul) {
693         buffer_readonly_view(&nm->extrainfo,0,0);
694     } else {
695         buffer_readonly_view(&nm->extrainfo, nul+1, msg->start-(nul+1));
696         nm->len=nul-nm->name;
697     }
698     return True;
699 }
700
701 static bool_t unpick_msg(struct site *st, uint32_t type,
702                          struct buffer_if *msg, struct msg *m)
703 {
704     unsigned minor;
705
706     m->capab_transformnum=-1;
707     m->hashstart=msg->start;
708     CHECK_AVAIL(msg,4);
709     m->dest=buf_unprepend_uint32(msg);
710     CHECK_AVAIL(msg,4);
711     m->source=buf_unprepend_uint32(msg);
712     CHECK_TYPE(msg,type);
713     if (!unpick_name(msg,&m->remote)) return False;
714     m->remote_capabilities=0;
715     m->remote_mtu=0;
716     if (m->remote.extrainfo.size) {
717         CHECK_AVAIL(&m->remote.extrainfo,4);
718         m->remote_capabilities=buf_unprepend_uint32(&m->remote.extrainfo);
719     }
720     if (type_is_msg34(type) && m->remote.extrainfo.size) {
721         CHECK_AVAIL(&m->remote.extrainfo,2);
722         m->remote_mtu=buf_unprepend_uint16(&m->remote.extrainfo);
723     }
724     if (!unpick_name(msg,&m->local)) return False;
725     if (type==LABEL_PROD) {
726         CHECK_EMPTY(msg);
727         return True;
728     }
729     CHECK_AVAIL(msg,NONCELEN);
730     m->nR=buf_unprepend(msg,NONCELEN);
731     if (type==LABEL_MSG1) {
732         CHECK_EMPTY(msg);
733         return True;
734     }
735     CHECK_AVAIL(msg,NONCELEN);
736     m->nL=buf_unprepend(msg,NONCELEN);
737     if (type==LABEL_MSG2) {
738         CHECK_EMPTY(msg);
739         return True;
740     }
741     if (MSGMAJOR(type) == 3) do {
742         minor = MSGMINOR(type);
743 #define MAYBE_READ_CAP(minminor, kind, dflt) do {                       \
744     if (minor < (minminor))                                             \
745         m->capab_##kind##num = (dflt);                                  \
746     else {                                                              \
747         CHECK_AVAIL(msg, 1);                                            \
748         m->capab_##kind##num = buf_unprepend_uint8(msg);                \
749     }                                                                   \
750 } while (0)
751         MAYBE_READ_CAP(1, transform, CAPAB_BIT_ANCIENTTRANSFORM);
752 #undef MAYBE_READ_CAP
753     } while (0);
754     CHECK_AVAIL(msg,2);
755     m->pklen=buf_unprepend_uint16(msg);
756     CHECK_AVAIL(msg,m->pklen);
757     m->pk=buf_unprepend(msg,m->pklen);
758     m->hashlen=msg->start-m->hashstart;
759
760     if (!st->pubkey->unpick(st->pubkey->st,msg,&m->sig)) {
761         return False;
762     }
763
764     CHECK_EMPTY(msg);
765
766     return True;
767 }
768
769 static bool_t name_matches(const struct parsedname *nm, const char *expected)
770 {
771     int expected_len=strlen(expected);
772     return
773         nm->len == expected_len &&
774         !memcmp(nm->name, expected, expected_len);
775 }    
776
777 static bool_t check_msg(struct site *st, uint32_t type, struct msg *m,
778                         cstring_t *error)
779 {
780     if (type==LABEL_MSG1) return True;
781
782     /* Check that the site names and our nonce have been sent
783        back correctly, and then store our peer's nonce. */ 
784     if (!name_matches(&m->remote,st->remotename)) {
785         *error="wrong remote site name";
786         return False;
787     }
788     if (!name_matches(&m->local,st->localname)) {
789         *error="wrong local site name";
790         return False;
791     }
792     if (memcmp(m->nL,st->localN,NONCELEN)!=0) {
793         *error="wrong locally-generated nonce";
794         return False;
795     }
796     if (type==LABEL_MSG2) return True;
797     if (!consttime_memeq(m->nR,st->remoteN,NONCELEN)) {
798         *error="wrong remotely-generated nonce";
799         return False;
800     }
801     /* MSG3 has complicated rules about capabilities, which are
802      * handled in process_msg3. */
803     if (MSGMAJOR(type) == 3) return True;
804     if (m->remote_capabilities!=st->remote_capabilities) {
805         *error="remote capabilities changed";
806         return False;
807     }
808     if (type==LABEL_MSG4) return True;
809     *error="unknown message type";
810     return False;
811 }
812
813 static bool_t kex_init(struct site *st)
814 {
815     st->random->generate(st->random->st,NONCELEN,st->localN);
816     return True;
817 }
818
819 static bool_t generate_msg1(struct site *st, const struct msg *prompt_maybe_0)
820 {
821     return
822         generate_msg(st,LABEL_MSG1,"site:MSG1",prompt_maybe_0);
823 }
824
825 static bool_t process_msg1(struct site *st, struct buffer_if *msg1,
826                            const struct comm_addr *src,
827                            const struct msg *m)
828 {
829     /* We've already determined we're in an appropriate state to
830        process an incoming MSG1, and that the MSG1 has correct values
831        of A and B. */
832
833     st->setup_session_id=m->source;
834     st->remote_capabilities=m->remote_capabilities;
835     memcpy(st->remoteN,m->nR,NONCELEN);
836     return True;
837 }
838
839 static bool_t generate_msg2(struct site *st,
840                             const struct msg *prompt_may_be_null)
841 {
842     return
843         generate_msg(st,LABEL_MSG2,"site:MSG2",prompt_may_be_null);
844 }
845
846 static bool_t process_msg2(struct site *st, struct buffer_if *msg2,
847                            const struct comm_addr *src,
848                            struct msg *m /* returned */)
849 {
850     cstring_t err;
851
852     if (!unpick_msg(st,LABEL_MSG2,msg2,m)) return False;
853     if (!check_msg(st,LABEL_MSG2,m,&err)) {
854         slog(st,LOG_SEC,"msg2: %s",err);
855         return False;
856     }
857     st->setup_session_id=m->source;
858     st->remote_capabilities=m->remote_capabilities;
859
860     /* Select the transform to use */
861
862     uint32_t remote_crypto_caps = st->remote_capabilities & CAPAB_TRANSFORM_MASK;
863     if (!remote_crypto_caps)
864         /* old secnets only had this one transform */
865         remote_crypto_caps = 1UL << CAPAB_BIT_ANCIENTTRANSFORM;
866
867 #define CHOOSE_CRYPTO(kind, whats) do {                                 \
868     struct kind##_if *iface;                                            \
869     uint32_t bit, ours = 0;                                             \
870     int i;                                                              \
871     for (i= 0; i < st->n##kind##s; i++) {                               \
872         iface=st->kind##s[i];                                           \
873         bit = 1UL << iface->capab_bit;                                  \
874         if (bit & remote_crypto_caps) goto kind##_found;                \
875         ours |= bit;                                                    \
876     }                                                                   \
877     slog(st,LOG_ERROR,"no " whats " in common"                          \
878          " (us %#"PRIx32"; them: %#"PRIx32")",                          \
879          st->local_capabilities & ours, remote_crypto_caps);            \
880     return False;                                                       \
881 kind##_found:                                                           \
882     st->chosen_##kind = iface;                                          \
883 } while (0)
884
885     CHOOSE_CRYPTO(transform, "transforms");
886
887 #undef CHOOSE_CRYPTO
888
889     memcpy(st->remoteN,m->nR,NONCELEN);
890     return True;
891 }
892
893 static bool_t generate_msg3(struct site *st, const struct msg *prompt)
894 {
895     /* Now we have our nonce and their nonce. Think of a secret key,
896        and create message number 3. */
897     st->random->generate(st->random->st,st->dh->len,st->dhsecret);
898     return generate_msg(st,
899                         (st->remote_capabilities & CAPAB_TRANSFORM_MASK)
900                         ? LABEL_MSG3BIS
901                         : LABEL_MSG3,
902                         "site:MSG3",prompt);
903 }
904
905 static bool_t process_msg3_msg4(struct site *st, struct msg *m)
906 {
907     /* Check signature and store g^x mod m */
908     if (!st->pubkey->check(st->pubkey->st,
909                            m->hashstart,m->hashlen,
910                            &m->sig)) {
911         slog(st,LOG_SEC,"msg3/msg4 signature failed check!");
912         return False;
913     }
914
915     st->remote_adv_mtu=m->remote_mtu;
916
917     return True;
918 }
919
920 static bool_t process_msg3(struct site *st, struct buffer_if *msg3,
921                            const struct comm_addr *src, uint32_t msgtype,
922                            struct msg *m /* returned */)
923 {
924     cstring_t err;
925
926     switch (msgtype) {
927         case CASES_MSG3_KNOWN: break;
928         default: assert(0);
929     }
930
931     if (!unpick_msg(st,msgtype,msg3,m)) return False;
932     if (!check_msg(st,msgtype,m,&err)) {
933         slog(st,LOG_SEC,"msg3: %s",err);
934         return False;
935     }
936     uint32_t capab_adv_late = m->remote_capabilities
937         & ~st->remote_capabilities & st->early_capabilities;
938     if (capab_adv_late) {
939         slog(st,LOG_SEC,"msg3 impermissibly adds early capability flag(s)"
940              " %#"PRIx32" (was %#"PRIx32", now %#"PRIx32")",
941              capab_adv_late, st->remote_capabilities, m->remote_capabilities);
942         return False;
943     }
944
945 #define CHOSE_CRYPTO(kind, what) do {                                   \
946     struct kind##_if *iface;                                            \
947     int i;                                                              \
948     for (i=0; i<st->n##kind##s; i++) {                                  \
949         iface=st->kind##s[i];                                           \
950         if (iface->capab_bit == m->capab_##kind##num)                   \
951             goto kind##_found;                                          \
952     }                                                                   \
953     slog(st,LOG_SEC,"peer chose unknown-to-us " what " %d!",            \
954          m->capab_##kind##num);                                                 \
955     return False;                                                       \
956 kind##_found:                                                           \
957     st->chosen_##kind=iface;                                            \
958 } while (0)
959
960     CHOSE_CRYPTO(transform, "transform");
961
962 #undef CHOSE_CRYPTO
963
964     if (!process_msg3_msg4(st,m))
965         return False;
966
967     /* Update our idea of the remote site's capabilities, now that we've
968      * verified that its message was authentic.
969      *
970      * Our previous idea of the remote site's capabilities came from the
971      * unauthenticated MSG1.  We've already checked that this new message
972      * doesn't change any of the bits we relied upon in the past, but it may
973      * also have set additional capability bits.  We simply throw those away
974      * now, and use the authentic capabilities from this MSG3. */
975     st->remote_capabilities=m->remote_capabilities;
976
977     /* Terminate their DH public key with a '0' */
978     m->pk[m->pklen]=0;
979     /* Invent our DH secret key */
980     st->random->generate(st->random->st,st->dh->len,st->dhsecret);
981
982     /* Generate the shared key and set up the transform */
983     if (!set_new_transform(st,m->pk)) return False;
984
985     return True;
986 }
987
988 static bool_t generate_msg4(struct site *st, const struct msg *prompt)
989 {
990     /* We have both nonces, their public key and our private key. Generate
991        our public key, sign it and send it to them. */
992     return generate_msg(st,LABEL_MSG4,"site:MSG4",prompt);
993 }
994
995 static bool_t process_msg4(struct site *st, struct buffer_if *msg4,
996                            const struct comm_addr *src,
997                            struct msg *m /* returned */)
998 {
999     cstring_t err;
1000
1001     if (!unpick_msg(st,LABEL_MSG4,msg4,m)) return False;
1002     if (!check_msg(st,LABEL_MSG4,m,&err)) {
1003         slog(st,LOG_SEC,"msg4: %s",err);
1004         return False;
1005     }
1006     
1007     if (!process_msg3_msg4(st,m))
1008         return False;
1009
1010     /* Terminate their DH public key with a '0' */
1011     m->pk[m->pklen]=0;
1012
1013     /* Generate the shared key and set up the transform */
1014     if (!set_new_transform(st,m->pk)) return False;
1015
1016     return True;
1017 }
1018
1019 struct msg0 {
1020     uint32_t dest;
1021     uint32_t source;
1022     uint32_t type;
1023 };
1024
1025 static bool_t unpick_msg0(struct site *st, struct buffer_if *msg0,
1026                           struct msg0 *m)
1027 {
1028     CHECK_AVAIL(msg0,4);
1029     m->dest=buf_unprepend_uint32(msg0);
1030     CHECK_AVAIL(msg0,4);
1031     m->source=buf_unprepend_uint32(msg0);
1032     CHECK_AVAIL(msg0,4);
1033     m->type=buf_unprepend_uint32(msg0);
1034     return True;
1035     /* Leaves transformed part of buffer untouched */
1036 }
1037
1038 static bool_t generate_msg5(struct site *st, const struct msg *prompt)
1039 {
1040     cstring_t transform_err;
1041
1042     BUF_ALLOC(&st->buffer,"site:MSG5");
1043     /* We are going to add four words to the message */
1044     buffer_init(&st->buffer,calculate_max_start_pad());
1045     /* Give the netlink code an opportunity to put its own stuff in the
1046        message (configuration information, etc.) */
1047     buf_prepend_uint32(&st->buffer,LABEL_MSG5);
1048     if (call_transform_forwards(st,st->new_transform,
1049                                 &st->buffer,&transform_err))
1050         return False;
1051     buf_prepend_uint32(&st->buffer,LABEL_MSG5);
1052     buf_prepend_uint32(&st->buffer,st->index);
1053     buf_prepend_uint32(&st->buffer,st->setup_session_id);
1054
1055     st->retries=st->setup_retries;
1056     return True;
1057 }
1058
1059 static bool_t process_msg5(struct site *st, struct buffer_if *msg5,
1060                            const struct comm_addr *src,
1061                            struct transform_inst_if *transform)
1062 {
1063     struct msg0 m;
1064     cstring_t transform_err;
1065
1066     if (!unpick_msg0(st,msg5,&m)) return False;
1067
1068     if (call_transform_reverse(st,transform,msg5,&transform_err)) {
1069         /* There's a problem */
1070         slog(st,LOG_SEC,"process_msg5: transform: %s",transform_err);
1071         return False;
1072     }
1073     /* Buffer should now contain untransformed PING packet data */
1074     CHECK_AVAIL(msg5,4);
1075     if (buf_unprepend_uint32(msg5)!=LABEL_MSG5) {
1076         slog(st,LOG_SEC,"MSG5/PING packet contained wrong label");
1077         return False;
1078     }
1079     /* Older versions of secnet used to write some config data here
1080      * which we ignore.  So we don't CHECK_EMPTY */
1081     return True;
1082 }
1083
1084 static void create_msg6(struct site *st, struct transform_inst_if *transform,
1085                         uint32_t session_id)
1086 {
1087     cstring_t transform_err;
1088
1089     BUF_ALLOC(&st->buffer,"site:MSG6");
1090     /* We are going to add four words to the message */
1091     buffer_init(&st->buffer,calculate_max_start_pad());
1092     /* Give the netlink code an opportunity to put its own stuff in the
1093        message (configuration information, etc.) */
1094     buf_prepend_uint32(&st->buffer,LABEL_MSG6);
1095     transform_apply_return problem =
1096         call_transform_forwards(st,transform,
1097                                 &st->buffer,&transform_err);
1098     assert(!problem);
1099     buf_prepend_uint32(&st->buffer,LABEL_MSG6);
1100     buf_prepend_uint32(&st->buffer,st->index);
1101     buf_prepend_uint32(&st->buffer,session_id);
1102 }
1103
1104 static bool_t generate_msg6(struct site *st, const struct msg *prompt)
1105 {
1106     if (!is_transform_valid(st->new_transform))
1107         return False;
1108     create_msg6(st,st->new_transform,st->setup_session_id);
1109     st->retries=1; /* Peer will retransmit MSG5 if this packet gets lost */
1110     return True;
1111 }
1112
1113 static bool_t process_msg6(struct site *st, struct buffer_if *msg6,
1114                            const struct comm_addr *src)
1115 {
1116     struct msg0 m;
1117     cstring_t transform_err;
1118
1119     if (!unpick_msg0(st,msg6,&m)) return False;
1120
1121     if (call_transform_reverse(st,st->new_transform,msg6,&transform_err)) {
1122         /* There's a problem */
1123         slog(st,LOG_SEC,"process_msg6: transform: %s",transform_err);
1124         return False;
1125     }
1126     /* Buffer should now contain untransformed PING packet data */
1127     CHECK_AVAIL(msg6,4);
1128     if (buf_unprepend_uint32(msg6)!=LABEL_MSG6) {
1129         slog(st,LOG_SEC,"MSG6/PONG packet contained invalid data");
1130         return False;
1131     }
1132     /* Older versions of secnet used to write some config data here
1133      * which we ignore.  So we don't CHECK_EMPTY */
1134     return True;
1135 }
1136
1137 static transform_apply_return
1138 decrypt_msg0(struct site *st, struct buffer_if *msg0,
1139                            const struct comm_addr *src)
1140 {
1141     cstring_t transform_err, auxkey_err, newkey_err="n/a";
1142     struct msg0 m;
1143     transform_apply_return problem;
1144
1145     if (!unpick_msg0(st,msg0,&m)) return False;
1146
1147     /* Keep a copy so we can try decrypting it with multiple keys */
1148     buffer_copy(&st->scratch, msg0);
1149
1150     problem = call_transform_reverse(st,st->current.transform,
1151                                      msg0,&transform_err);
1152     if (!problem) {
1153         if (!st->auxiliary_is_new)
1154             delete_one_key(st,&st->auxiliary_key,
1155                            "peer has used new key","auxiliary key",LOG_SEC);
1156         return 0;
1157     }
1158     if (transform_apply_return_badseq(problem))
1159         goto badseq;
1160
1161     buffer_copy(msg0, &st->scratch);
1162     problem = call_transform_reverse(st,st->auxiliary_key.transform,
1163                                      msg0,&auxkey_err);
1164     if (!problem) {
1165         slog(st,LOG_DROP,"processing packet which uses auxiliary key");
1166         if (st->auxiliary_is_new) {
1167             /* We previously timed out in state SENTMSG5 but it turns
1168              * out that our peer did in fact get our MSG5 and is
1169              * using the new key.  So we should switch to it too. */
1170             /* This is a bit like activate_new_key. */
1171             struct data_key t;
1172             t=st->current;
1173             st->current=st->auxiliary_key;
1174             st->auxiliary_key=t;
1175
1176             delete_one_key(st,&st->auxiliary_key,"peer has used new key",
1177                            "previous key",LOG_SEC);
1178             st->auxiliary_is_new=0;
1179             st->renegotiate_key_time=st->auxiliary_renegotiate_key_time;
1180         }
1181         return 0;
1182     }
1183     if (transform_apply_return_badseq(problem))
1184         goto badseq;
1185
1186     if (st->state==SITE_SENTMSG5) {
1187         buffer_copy(msg0, &st->scratch);
1188         problem = call_transform_reverse(st,st->new_transform,
1189                                          msg0,&newkey_err);
1190         if (!problem) {
1191             /* It looks like we didn't get the peer's MSG6 */
1192             /* This is like a cut-down enter_new_state(SITE_RUN) */
1193             slog(st,LOG_STATE,"will enter state RUN (MSG0 with new key)");
1194             BUF_FREE(&st->buffer);
1195             st->timeout=0;
1196             activate_new_key(st);
1197             return 0; /* do process the data in this packet */
1198         }
1199         if (transform_apply_return_badseq(problem))
1200             goto badseq;
1201     }
1202
1203     slog(st,LOG_SEC,"transform: %s (aux: %s, new: %s)",
1204          transform_err,auxkey_err,newkey_err);
1205     initiate_key_setup(st,"incoming message would not decrypt",0);
1206     send_nak(src,m.dest,m.source,m.type,msg0,"message would not decrypt");
1207     assert(problem);
1208     return problem;
1209
1210  badseq:
1211     slog(st,LOG_DROP,"transform: %s (bad seq.)",transform_err);
1212     assert(problem);
1213     return problem;
1214 }
1215
1216 static bool_t process_msg0(struct site *st, struct buffer_if *msg0,
1217                            const struct comm_addr *src)
1218 {
1219     uint32_t type;
1220     transform_apply_return problem;
1221
1222     problem = decrypt_msg0(st,msg0,src);
1223     if (problem==transform_apply_seqdupe) {
1224         /* We recently received another copy of this packet, maybe due
1225          * to polypath.  That's not a problem; indeed, for the
1226          * purposes of transport address management it is a success.
1227          * But we don't want to process the packet. */
1228         transport_data_msgok(st,src);
1229         return False;
1230     }
1231     if (problem)
1232         return False;
1233
1234     CHECK_AVAIL(msg0,4);
1235     type=buf_unprepend_uint32(msg0);
1236     switch(type) {
1237     case LABEL_MSG7:
1238         /* We must forget about the current session. */
1239         delete_keys(st,"request from peer",LOG_SEC);
1240         /* probably, the peer is shutting down, and this is going to fail,
1241          * but we need to be trying to bring the link up again */
1242         if (st->keepalive)
1243             initiate_key_setup(st,"peer requested key teardown",0);
1244         return True;
1245     case LABEL_MSG9:
1246         /* Deliver to netlink layer */
1247         st->netlink->deliver(st->netlink->st,msg0);
1248         transport_data_msgok(st,src);
1249         /* See whether we should start negotiating a new key */
1250         if (st->now > st->renegotiate_key_time)
1251             initiate_key_setup(st,"incoming packet in renegotiation window",0);
1252         return True;
1253     default:
1254         slog(st,LOG_SEC,"incoming encrypted message of type %08x "
1255              "(unknown)",type);
1256         break;
1257     }
1258     return False;
1259 }
1260
1261 static void dump_packet(struct site *st, struct buffer_if *buf,
1262                         const struct comm_addr *addr, bool_t incoming,
1263                         bool_t ok)
1264 {
1265     uint32_t dest=get_uint32(buf->start);
1266     uint32_t source=get_uint32(buf->start+4);
1267     uint32_t msgtype=get_uint32(buf->start+8);
1268
1269     if (st->log_events & LOG_DUMP)
1270         slilog(st->log,M_DEBUG,"%s: %s: %08x<-%08x: %08x: %s%s",
1271                st->tunname,incoming?"incoming":"outgoing",
1272                dest,source,msgtype,comm_addr_to_string(addr),
1273                ok?"":" - fail");
1274 }
1275
1276 static bool_t comm_addr_sendmsg(struct site *st,
1277                                 const struct comm_addr *dest,
1278                                 struct buffer_if *buf)
1279 {
1280     int i;
1281     struct comm_clientinfo *commclientinfo = 0;
1282
1283     for (i=0; i < st->ncomms; i++) {
1284         if (st->comms[i] == dest->comm) {
1285             commclientinfo = st->commclientinfos[i];
1286             break;
1287         }
1288     }
1289     return dest->comm->sendmsg(dest->comm->st, buf, dest, commclientinfo);
1290 }
1291
1292 static uint32_t site_status(void *st)
1293 {
1294     return 0;
1295 }
1296
1297 static bool_t send_msg(struct site *st)
1298 {
1299     if (st->retries>0) {
1300         transport_xmit(st, &st->setup_peers, &st->buffer, True);
1301         st->timeout=st->now+st->setup_retry_interval;
1302         st->retries--;
1303         return True;
1304     } else if (st->state==SITE_SENTMSG5) {
1305         logtimeout(st,"timed out sending MSG5, stashing new key");
1306         /* We stash the key we have produced, in case it turns out that
1307          * our peer did see our MSG5 after all and starts using it. */
1308         /* This is a bit like some of activate_new_key */
1309         struct transform_inst_if *t;
1310         t=st->auxiliary_key.transform;
1311         st->auxiliary_key.transform=st->new_transform;
1312         st->new_transform=t;
1313         dispose_transform(&st->new_transform);
1314
1315         st->auxiliary_is_new=1;
1316         st->auxiliary_key.key_timeout=st->now+st->key_lifetime;
1317         st->auxiliary_renegotiate_key_time=st->now+st->key_renegotiate_time;
1318         st->auxiliary_key.remote_session_id=st->setup_session_id;
1319
1320         enter_state_wait(st);
1321         return False;
1322     } else {
1323         logtimeout(st,"timed out sending key setup packet "
1324             "(in state %s)",state_name(st->state));
1325         enter_state_wait(st);
1326         return False;
1327     }
1328 }
1329
1330 static void site_resolve_callback(void *sst, const struct comm_addr *addrs,
1331                                   int stored_naddrs, int all_naddrs,
1332                                   const char *address, const char *failwhy)
1333 {
1334     struct site *st=sst;
1335
1336     if (!stored_naddrs) {
1337         slog(st,LOG_ERROR,"resolution of %s failed: %s",address,failwhy);
1338     } else {
1339         slog(st,LOG_PEER_ADDRS,"resolution of %s completed, %d addrs, eg: %s",
1340              address, all_naddrs, comm_addr_to_string(&addrs[0]));;
1341
1342         int space=st->transport_peers_max-st->resolving_n_results_stored;
1343         int n_tocopy=MIN(stored_naddrs,space);
1344         COPY_ARRAY(st->resolving_results + st->resolving_n_results_stored,
1345                    addrs,
1346                    n_tocopy);
1347         st->resolving_n_results_stored += n_tocopy;
1348         st->resolving_n_results_all += all_naddrs;
1349     }
1350
1351     decrement_resolving_count(st,1);
1352 }
1353
1354 static void decrement_resolving_count(struct site *st, int by)
1355 {
1356     assert(st->resolving_count>0);
1357     st->resolving_count-=by;
1358
1359     if (st->resolving_count)
1360         return;
1361
1362     /* OK, we are done with them all.  Handle combined results. */
1363
1364     const struct comm_addr *addrs=st->resolving_results;
1365     int naddrs=st->resolving_n_results_stored;
1366     assert(naddrs<=st->transport_peers_max);
1367
1368     if (naddrs) {
1369         if (naddrs != st->resolving_n_results_all) {
1370             slog(st,LOG_SETUP_INIT,"resolution of supplied addresses/names"
1371                  " yielded too many results (%d > %d), some ignored",
1372                  st->resolving_n_results_all, naddrs);
1373         }
1374         slog(st,LOG_STATE,"resolution completed, %d addrs, eg: %s",
1375              naddrs, iaddr_to_string(&addrs[0].ia));;
1376     }
1377
1378     switch (st->state) {
1379     case SITE_RESOLVE:
1380         if (transport_compute_setupinit_peers(st,addrs,naddrs,0)) {
1381             enter_new_state(st,SITE_SENTMSG1,0);
1382         } else {
1383             /* Can't figure out who to try to to talk to */
1384             slog(st,LOG_SETUP_INIT,
1385                  "key exchange failed: cannot find peer address");
1386             enter_state_run(st);
1387         }
1388         break;
1389     case SITE_SENTMSG1: case SITE_SENTMSG2:
1390     case SITE_SENTMSG3: case SITE_SENTMSG4:
1391     case SITE_SENTMSG5:
1392         if (naddrs) {
1393             /* We start using the address immediately for data too.
1394              * It's best to store it in st->peers now because we might
1395              * go via SENTMSG5, WAIT, and a MSG0, straight into using
1396              * the new key (without updating the data peer addrs). */
1397             transport_resolve_complete(st,addrs,naddrs);
1398         } else if (st->local_mobile) {
1399             /* We can't let this rest because we may have a peer
1400              * address which will break in the future. */
1401             slog(st,LOG_SETUP_INIT,"resolution failed: "
1402                  "abandoning key exchange");
1403             enter_state_wait(st);
1404         } else {
1405             slog(st,LOG_SETUP_INIT,"resolution failed: "
1406                  " continuing to use source address of peer's packets"
1407                  " for key exchange and ultimately data");
1408         }
1409         break;
1410     case SITE_RUN:
1411         if (naddrs) {
1412             slog(st,LOG_SETUP_INIT,"resolution completed tardily,"
1413                  " updating peer address(es)");
1414             transport_resolve_complete_tardy(st,addrs,naddrs);
1415         } else if (st->local_mobile) {
1416             /* Not very good.  We should queue (another) renegotiation
1417              * so that we can update the peer address. */
1418             st->key_renegotiate_time=st->now+wait_timeout(st);
1419         } else {
1420             slog(st,LOG_SETUP_INIT,"resolution failed: "
1421                  " continuing to use source address of peer's packets");
1422         }
1423         break;
1424     case SITE_WAIT:
1425     case SITE_STOP:
1426         /* oh well */
1427         break;
1428     }
1429 }
1430
1431 static bool_t initiate_key_setup(struct site *st, cstring_t reason,
1432                                  const struct comm_addr *prod_hint)
1433 {
1434     /* Reentrancy hazard: can call enter_new_state/enter_state_* */
1435     if (st->state!=SITE_RUN) return False;
1436     slog(st,LOG_SETUP_INIT,"initiating key exchange (%s)",reason);
1437     if (st->addresses) {
1438         slog(st,LOG_SETUP_INIT,"resolving peer address(es)");
1439         return enter_state_resolve(st);
1440     } else if (transport_compute_setupinit_peers(st,0,0,prod_hint)) {
1441         return enter_new_state(st,SITE_SENTMSG1,0);
1442     }
1443     slog(st,LOG_SETUP_INIT,"key exchange failed: no address for peer");
1444     return False;
1445 }
1446
1447 static void activate_new_key(struct site *st)
1448 {
1449     struct transform_inst_if *t;
1450
1451     /* We have three transform instances, which we swap between old,
1452        active and setup */
1453     t=st->auxiliary_key.transform;
1454     st->auxiliary_key.transform=st->current.transform;
1455     st->current.transform=st->new_transform;
1456     st->new_transform=t;
1457     dispose_transform(&st->new_transform);
1458
1459     st->timeout=0;
1460     st->auxiliary_is_new=0;
1461     st->auxiliary_key.key_timeout=st->current.key_timeout;
1462     st->current.key_timeout=st->now+st->key_lifetime;
1463     st->renegotiate_key_time=st->now+st->key_renegotiate_time;
1464     transport_peers_copy(st,&st->peers,&st->setup_peers);
1465     st->current.remote_session_id=st->setup_session_id;
1466
1467     /* Compute the inter-site MTU.  This is min( our_mtu, their_mtu ).
1468      * But their mtu be unspecified, in which case we just use ours. */
1469     uint32_t intersite_mtu=
1470         MIN(st->mtu_target, st->remote_adv_mtu ?: ~(uint32_t)0);
1471     st->netlink->set_mtu(st->netlink->st,intersite_mtu);
1472
1473     slog(st,LOG_ACTIVATE_KEY,"new key activated"
1474          " (mtu ours=%"PRId32" theirs=%"PRId32" intersite=%"PRId32")",
1475          st->mtu_target, st->remote_adv_mtu, intersite_mtu);
1476     enter_state_run(st);
1477 }
1478
1479 static void delete_one_key(struct site *st, struct data_key *key,
1480                            cstring_t reason, cstring_t which, uint32_t loglevel)
1481 {
1482     if (!is_transform_valid(key->transform)) return;
1483     if (reason) slog(st,loglevel,"%s deleted (%s)",which,reason);
1484     dispose_transform(&key->transform);
1485     key->key_timeout=0;
1486 }
1487
1488 static void delete_keys(struct site *st, cstring_t reason, uint32_t loglevel)
1489 {
1490     if (current_valid(st)) {
1491         slog(st,loglevel,"session closed (%s)",reason);
1492
1493         delete_one_key(st,&st->current,0,0,0);
1494         set_link_quality(st);
1495     }
1496     delete_one_key(st,&st->auxiliary_key,0,0,0);
1497 }
1498
1499 static void state_assert(struct site *st, bool_t ok)
1500 {
1501     if (!ok) fatal("site:state_assert");
1502 }
1503
1504 static void enter_state_stop(struct site *st)
1505 {
1506     st->state=SITE_STOP;
1507     st->timeout=0;
1508     delete_keys(st,"entering state STOP",LOG_TIMEOUT_KEY);
1509     dispose_transform(&st->new_transform);
1510 }
1511
1512 static void set_link_quality(struct site *st)
1513 {
1514     uint32_t quality;
1515     if (current_valid(st))
1516         quality=LINK_QUALITY_UP;
1517     else if (st->state==SITE_WAIT || st->state==SITE_STOP)
1518         quality=LINK_QUALITY_DOWN;
1519     else if (st->addresses)
1520         quality=LINK_QUALITY_DOWN_CURRENT_ADDRESS;
1521     else if (transport_peers_valid(&st->peers))
1522         quality=LINK_QUALITY_DOWN_STALE_ADDRESS;
1523     else
1524         quality=LINK_QUALITY_DOWN;
1525
1526     st->netlink->set_quality(st->netlink->st,quality);
1527 }
1528
1529 static void enter_state_run(struct site *st)
1530 {
1531     slog(st,LOG_STATE,"entering state RUN%s",
1532          current_valid(st) ? " (keyed)" : " (unkeyed)");
1533     st->state=SITE_RUN;
1534     st->timeout=0;
1535
1536     st->setup_session_id=0;
1537     transport_peers_clear(st,&st->setup_peers);
1538     FILLZERO(st->localN);
1539     FILLZERO(st->remoteN);
1540     dispose_transform(&st->new_transform);
1541     memset(st->dhsecret,0,st->dh->len);
1542     if (st->sharedsecret) memset(st->sharedsecret,0,st->sharedsecretlen);
1543     set_link_quality(st);
1544
1545     if (st->keepalive && !current_valid(st))
1546         initiate_key_setup(st, "keepalive", 0);
1547 }
1548
1549 static bool_t ensure_resolving(struct site *st)
1550 {
1551     /* Reentrancy hazard: may call site_resolve_callback and hence
1552      * enter_new_state, enter_state_* and generate_msg*. */
1553     if (st->resolving_count)
1554         return True;
1555
1556     assert(st->addresses);
1557
1558     /* resolver->request might reentrantly call site_resolve_callback
1559      * which will decrement st->resolving, so we need to increment it
1560      * twice beforehand to prevent decrement from thinking we're
1561      * finished, and decrement it ourselves.  Alternatively if
1562      * everything fails then there are no callbacks due and we simply
1563      * set it to 0 and return false.. */
1564     st->resolving_n_results_stored=0;
1565     st->resolving_n_results_all=0;
1566     st->resolving_count+=2;
1567     const char **addrp=st->addresses;
1568     const char *address;
1569     bool_t anyok=False;
1570     for (; (address=*addrp++); ) {
1571         bool_t ok = st->resolver->request(st->resolver->st,address,
1572                                           st->remoteport,st->comms[0],
1573                                           site_resolve_callback,st);
1574         if (ok)
1575             st->resolving_count++;
1576         anyok|=ok;
1577     }
1578     if (!anyok) {
1579         st->resolving_count=0;
1580         return False;
1581     }
1582     decrement_resolving_count(st,2);
1583     return True;
1584 }
1585
1586 static bool_t enter_state_resolve(struct site *st)
1587 {
1588     /* Reentrancy hazard!  See ensure_resolving. */
1589     state_assert(st,st->state==SITE_RUN);
1590     slog(st,LOG_STATE,"entering state RESOLVE");
1591     st->state=SITE_RESOLVE;
1592     return ensure_resolving(st);
1593 }
1594
1595 static bool_t enter_new_state(struct site *st, uint32_t next,
1596                               const struct msg *prompt
1597                               /* may be 0 for SENTMSG1 */)
1598 {
1599     bool_t (*gen)(struct site *st, const struct msg *prompt);
1600     int r;
1601
1602     slog(st,LOG_STATE,"entering state %s",state_name(next));
1603     switch(next) {
1604     case SITE_SENTMSG1:
1605         state_assert(st,st->state==SITE_RUN || st->state==SITE_RESOLVE);
1606         if (!kex_init(st)) return False;
1607         gen=generate_msg1;
1608         st->msg1_crossed_logged = False;
1609         break;
1610     case SITE_SENTMSG2:
1611         state_assert(st,st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1612                      st->state==SITE_SENTMSG1 || st->state==SITE_WAIT);
1613         if (!kex_init(st)) return False;
1614         gen=generate_msg2;
1615         break;
1616     case SITE_SENTMSG3:
1617         state_assert(st,st->state==SITE_SENTMSG1);
1618         BUF_FREE(&st->buffer);
1619         gen=generate_msg3;
1620         break;
1621     case SITE_SENTMSG4:
1622         state_assert(st,st->state==SITE_SENTMSG2);
1623         BUF_FREE(&st->buffer);
1624         gen=generate_msg4;
1625         break;
1626     case SITE_SENTMSG5:
1627         state_assert(st,st->state==SITE_SENTMSG3);
1628         BUF_FREE(&st->buffer);
1629         gen=generate_msg5;
1630         break;
1631     case SITE_RUN:
1632         state_assert(st,st->state==SITE_SENTMSG4);
1633         BUF_FREE(&st->buffer);
1634         gen=generate_msg6;
1635         break;
1636     default:
1637         gen=NULL;
1638         fatal("enter_new_state(%s): invalid new state",state_name(next));
1639         break;
1640     }
1641
1642     if (hacky_par_start_failnow()) return False;
1643
1644     r= gen(st,prompt) && send_msg(st);
1645
1646     hacky_par_end(&r,
1647                   st->setup_retries, st->setup_retry_interval,
1648                   send_msg, st);
1649     
1650     if (r) {
1651         st->state=next;
1652         if (next==SITE_RUN) {
1653             BUF_FREE(&st->buffer); /* Never reused */
1654             st->timeout=0; /* Never retransmit */
1655             activate_new_key(st);
1656         }
1657         return True;
1658     }
1659     slog(st,LOG_ERROR,"error entering state %s",state_name(next));
1660     st->buffer.free=False; /* Unconditionally use the buffer; it may be
1661                               in either state, and enter_state_wait() will
1662                               do a BUF_FREE() */
1663     enter_state_wait(st);
1664     return False;
1665 }
1666
1667 /* msg7 tells our peer that we're about to forget our key */
1668 static bool_t send_msg7(struct site *st, cstring_t reason)
1669 {
1670     cstring_t transform_err;
1671
1672     if (current_valid(st) && st->buffer.free
1673         && transport_peers_valid(&st->peers)) {
1674         BUF_ALLOC(&st->buffer,"site:MSG7");
1675         buffer_init(&st->buffer,calculate_max_start_pad());
1676         buf_append_uint32(&st->buffer,LABEL_MSG7);
1677         buf_append_string(&st->buffer,reason);
1678         if (call_transform_forwards(st, st->current.transform,
1679                                     &st->buffer, &transform_err))
1680             goto free_out;
1681         buf_prepend_uint32(&st->buffer,LABEL_MSG0);
1682         buf_prepend_uint32(&st->buffer,st->index);
1683         buf_prepend_uint32(&st->buffer,st->current.remote_session_id);
1684         transport_xmit(st,&st->peers,&st->buffer,True);
1685         BUF_FREE(&st->buffer);
1686     free_out:
1687         return True;
1688     }
1689     return False;
1690 }
1691
1692 /* We go into this state if our peer becomes uncommunicative. Similar to
1693    the "stop" state, we forget all session keys for a while, before
1694    re-entering the "run" state. */
1695 static void enter_state_wait(struct site *st)
1696 {
1697     slog(st,LOG_STATE,"entering state WAIT");
1698     st->timeout=st->now+wait_timeout(st);
1699     st->state=SITE_WAIT;
1700     set_link_quality(st);
1701     BUF_FREE(&st->buffer); /* will have had an outgoing packet in it */
1702     /* XXX Erase keys etc. */
1703 }
1704
1705 static void generate_prod(struct site *st, struct buffer_if *buf)
1706 {
1707     buffer_init(buf,0);
1708     buf_append_uint32(buf,0);
1709     buf_append_uint32(buf,0);
1710     buf_append_uint32(buf,LABEL_PROD);
1711     buf_append_string(buf,st->localname);
1712     buf_append_string(buf,st->remotename);
1713 }
1714
1715 static void generate_send_prod(struct site *st,
1716                                const struct comm_addr *source)
1717 {
1718     if (!st->allow_send_prod) return; /* too soon */
1719     if (!(st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1720           st->state==SITE_WAIT)) return; /* we'd ignore peer's MSG1 */
1721
1722     slog(st,LOG_SETUP_INIT,"prodding peer for key exchange");
1723     st->allow_send_prod=0;
1724     generate_prod(st,&st->scratch);
1725     bool_t ok = comm_addr_sendmsg(st, source, &st->scratch);
1726     dump_packet(st,&st->scratch,source,False,ok);
1727 }
1728
1729 static inline void site_settimeout(uint64_t timeout, int *timeout_io)
1730 {
1731     if (timeout) {
1732         int64_t offset=timeout-*now;
1733         if (offset<0) offset=0;
1734         if (offset>INT_MAX) offset=INT_MAX;
1735         if (*timeout_io<0 || offset<*timeout_io)
1736             *timeout_io=offset;
1737     }
1738 }
1739
1740 static int site_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
1741                            int *timeout_io)
1742 {
1743     struct site *st=sst;
1744
1745     BEFOREPOLL_WANT_FDS(0); /* We don't use any file descriptors */
1746     st->now=*now;
1747
1748     /* Work out when our next timeout is. The earlier of 'timeout' or
1749        'current.key_timeout'. A stored value of '0' indicates no timeout
1750        active. */
1751     site_settimeout(st->timeout, timeout_io);
1752     site_settimeout(st->current.key_timeout, timeout_io);
1753     site_settimeout(st->auxiliary_key.key_timeout, timeout_io);
1754
1755     return 0; /* success */
1756 }
1757
1758 static void check_expiry(struct site *st, struct data_key *key,
1759                          const char *which)
1760 {
1761     if (key->key_timeout && *now>key->key_timeout) {
1762         delete_one_key(st,key,"maximum life exceeded",which,LOG_TIMEOUT_KEY);
1763     }
1764 }
1765
1766 /* NB site_afterpoll will be called before site_beforepoll is ever called */
1767 static void site_afterpoll(void *sst, struct pollfd *fds, int nfds)
1768 {
1769     struct site *st=sst;
1770
1771     st->now=*now;
1772     if (st->timeout && *now>st->timeout) {
1773         st->timeout=0;
1774         if (st->state>=SITE_SENTMSG1 && st->state<=SITE_SENTMSG5) {
1775             if (!hacky_par_start_failnow())
1776                 send_msg(st);
1777         } else if (st->state==SITE_WAIT) {
1778             enter_state_run(st);
1779         } else {
1780             slog(st,LOG_ERROR,"site_afterpoll: unexpected timeout, state=%d",
1781                  st->state);
1782         }
1783     }
1784     check_expiry(st,&st->current,"current key");
1785     check_expiry(st,&st->auxiliary_key,"auxiliary key");
1786 }
1787
1788 /* This function is called by the netlink device to deliver packets
1789    intended for the remote network. The packet is in "raw" wire
1790    format, but is guaranteed to be word-aligned. */
1791 static void site_outgoing(void *sst, struct buffer_if *buf)
1792 {
1793     struct site *st=sst;
1794     cstring_t transform_err;
1795     
1796     if (st->state==SITE_STOP) {
1797         BUF_FREE(buf);
1798         return;
1799     }
1800
1801     st->allow_send_prod=1;
1802
1803     /* In all other states we consider delivering the packet if we have
1804        a valid key and a valid address to send it to. */
1805     if (current_valid(st) && transport_peers_valid(&st->peers)) {
1806         /* Transform it and send it */
1807         if (buf->size>0) {
1808             buf_prepend_uint32(buf,LABEL_MSG9);
1809             if (call_transform_forwards(st, st->current.transform,
1810                                         buf, &transform_err))
1811                 goto free_out;
1812             buf_prepend_uint32(buf,LABEL_MSG0);
1813             buf_prepend_uint32(buf,st->index);
1814             buf_prepend_uint32(buf,st->current.remote_session_id);
1815             transport_xmit(st,&st->peers,buf,False);
1816         }
1817     free_out:
1818         BUF_FREE(buf);
1819         return;
1820     }
1821
1822     slog(st,LOG_DROP,"discarding outgoing packet of size %d",buf->size);
1823     BUF_FREE(buf);
1824     initiate_key_setup(st,"outgoing packet",0);
1825 }
1826
1827 static bool_t named_for_us(struct site *st, const struct buffer_if *buf_in,
1828                            uint32_t type, struct msg *m,
1829                            struct priomsg *whynot)
1830     /* For packets which are identified by the local and remote names.
1831      * If it has our name and our peer's name in it it's for us. */
1832 {
1833     struct buffer_if buf[1];
1834     buffer_readonly_clone(buf,buf_in);
1835
1836     if (!unpick_msg(st,type,buf,m)) {
1837         priomsg_update_fixed(whynot, comm_notify_whynot_unpick, "malformed");
1838         return False;
1839     }
1840 #define NAME_MATCHES(lr)                                                \
1841     if (!name_matches(&m->lr, st->lr##name)) {                          \
1842         if (priomsg_update_fixed(whynot, comm_notify_whynot_name_##lr,  \
1843                                  "unknown " #lr " name: ")) {           \
1844             truncmsg_add_packet_string(&whynot->m, m->lr.len, m->lr.name); \
1845         }                                                               \
1846         return False;                                                   \
1847     }
1848     NAME_MATCHES(remote);
1849     NAME_MATCHES(local );
1850 #undef NAME_MATCHES
1851
1852     return True;
1853 }
1854
1855 static bool_t we_have_priority(struct site *st, const struct msg *m) {
1856     if (st->local_capabilities & m->remote_capabilities &
1857         CAPAB_PRIORITY_MOBILE) {
1858         if (st->local_mobile) return True;
1859         if (st-> peer_mobile) return False;
1860     }
1861     return st->our_name_later;
1862 }
1863
1864 static bool_t setup_late_msg_ok(struct site *st, 
1865                                 const struct buffer_if *buf_in,
1866                                 uint32_t msgtype,
1867                                 const struct comm_addr *source,
1868                                 struct msg *m /* returned */) {
1869     /* For setup packets which seem from their type like they are
1870      * late.  Maybe they came via a different path.  All we do is make
1871      * a note of the sending address, iff they look like they are part
1872      * of the current key setup attempt. */
1873     if (!named_for_us(st,buf_in,msgtype,m,0))
1874         /* named_for_us calls unpick_msg which gets the nonces */
1875         return False;
1876     if (!consttime_memeq(m->nR,st->remoteN,NONCELEN) ||
1877         !consttime_memeq(m->nL,st->localN, NONCELEN))
1878         /* spoof ?  from stale run ?  who knows */
1879         return False;
1880     transport_setup_msgok(st,source);
1881     return True;
1882 }
1883
1884 /* This function is called by the communication device to deliver
1885    packets from our peers.
1886    It should return True if the packet is recognised as being for
1887    this current site instance (and should therefore not be processed
1888    by other sites), even if the packet was otherwise ignored. */
1889 static bool_t site_incoming(void *sst, struct buffer_if *buf,
1890                             const struct comm_addr *source,
1891                             struct priomsg *whynot)
1892 {
1893     struct site *st=sst;
1894
1895     if (buf->size < 12) return False;
1896
1897     uint32_t dest=get_uint32(buf->start);
1898     uint32_t msgtype=get_uint32(buf->start+8);
1899     struct msg msg;
1900       /* initialised by named_for_us, or process_msgN for N!=1 */
1901
1902     if (msgtype==LABEL_MSG1) {
1903         if (!named_for_us(st,buf,msgtype,&msg,whynot))
1904             return False;
1905         /* It's a MSG1 addressed to us. Decide what to do about it. */
1906         dump_packet(st,buf,source,True,True);
1907         if (st->state==SITE_RUN || st->state==SITE_RESOLVE ||
1908             st->state==SITE_WAIT) {
1909             /* We should definitely process it */
1910             transport_compute_setupinit_peers(st,0,0,source);
1911             if (process_msg1(st,buf,source,&msg)) {
1912                 slog(st,LOG_SETUP_INIT,"key setup initiated by peer");
1913                 bool_t entered=enter_new_state(st,SITE_SENTMSG2,&msg);
1914                 if (entered && st->addresses && st->local_mobile)
1915                     /* We must do this as the very last thing, because
1916                        the resolver callback might reenter us. */
1917                     ensure_resolving(st);
1918             } else {
1919                 slog(st,LOG_ERROR,"failed to process incoming msg1");
1920             }
1921             BUF_FREE(buf);
1922             return True;
1923         } else if (st->state==SITE_SENTMSG1) {
1924             /* We've just sent a message 1! They may have crossed on
1925                the wire. If we have priority then we ignore the
1926                incoming one, otherwise we process it as usual. */
1927             if (we_have_priority(st,&msg)) {
1928                 BUF_FREE(buf);
1929                 if (!st->msg1_crossed_logged++)
1930                     slog(st,LOG_SETUP_INIT,"crossed msg1s; we are higher "
1931                          "priority => ignore incoming msg1");
1932                 return True;
1933             } else {
1934                 slog(st,LOG_SETUP_INIT,"crossed msg1s; we are lower "
1935                      "priority => use incoming msg1");
1936                 if (process_msg1(st,buf,source,&msg)) {
1937                     BUF_FREE(&st->buffer); /* Free our old message 1 */
1938                     transport_setup_msgok(st,source);
1939                     enter_new_state(st,SITE_SENTMSG2,&msg);
1940                 } else {
1941                     slog(st,LOG_ERROR,"failed to process an incoming "
1942                          "crossed msg1 (we have low priority)");
1943                 }
1944                 BUF_FREE(buf);
1945                 return True;
1946             }
1947         } else if (st->state==SITE_SENTMSG2 ||
1948                    st->state==SITE_SENTMSG4) {
1949             if (consttime_memeq(msg.nR,st->remoteN,NONCELEN)) {
1950                 /* We are ahead in the protocol, but that msg1 had the
1951                  * peer's nonce so presumably it is from this key
1952                  * exchange run, via a slower route */
1953                 transport_setup_msgok(st,source);
1954             } else {
1955                 slog(st,LOG_UNEXPECTED,"competing incoming message 1");
1956             }
1957             BUF_FREE(buf);
1958             return True;
1959         }
1960         /* The message 1 was received at an unexpected stage of the
1961            key setup.  Well, they lost the race. */
1962         slog(st,LOG_UNEXPECTED,"unexpected incoming message 1");
1963         BUF_FREE(buf);
1964         return True;
1965     }
1966     if (msgtype==LABEL_PROD) {
1967         if (!named_for_us(st,buf,msgtype,&msg,whynot))
1968             return False;
1969         dump_packet(st,buf,source,True,True);
1970         if (st->state!=SITE_RUN) {
1971             slog(st,LOG_DROP,"ignoring PROD when not in state RUN");
1972         } else if (current_valid(st)) {
1973             slog(st,LOG_DROP,"ignoring PROD when we think we have a key");
1974         } else {
1975             initiate_key_setup(st,"peer sent PROD packet",source);
1976         }
1977         BUF_FREE(buf);
1978         return True;
1979     }
1980     if (dest==st->index) {
1981         /* Explicitly addressed to us */
1982         if (msgtype!=LABEL_MSG0) dump_packet(st,buf,source,True,True);
1983         switch (msgtype) {
1984         case LABEL_NAK:
1985             /* If the source is our current peer then initiate a key setup,
1986                because our peer's forgotten the key */
1987             if (get_uint32(buf->start+4)==st->current.remote_session_id) {
1988                 bool_t initiated;
1989                 initiated = initiate_key_setup(st,"received a NAK",source);
1990                 if (!initiated) generate_send_prod(st,source);
1991             } else {
1992                 slog(st,LOG_SEC,"bad incoming NAK");
1993             }
1994             break;
1995         case LABEL_MSG0:
1996             process_msg0(st,buf,source);
1997             break;
1998         case LABEL_MSG1:
1999             /* Setup packet: should not have been explicitly addressed
2000                to us */
2001             slog(st,LOG_SEC,"incoming explicitly addressed msg1");
2002             break;
2003         case LABEL_MSG2:
2004             /* Setup packet: expected only in state SENTMSG1 */
2005             if (st->state!=SITE_SENTMSG1) {
2006                 if ((st->state==SITE_SENTMSG3 ||
2007                      st->state==SITE_SENTMSG5) &&
2008                     setup_late_msg_ok(st,buf,msgtype,source,&msg))
2009                     break;
2010                 slog(st,LOG_UNEXPECTED,"unexpected MSG2");
2011             } else if (process_msg2(st,buf,source,&msg)) {
2012                 transport_setup_msgok(st,source);
2013                 enter_new_state(st,SITE_SENTMSG3,&msg);
2014             } else {
2015                 slog(st,LOG_SEC,"invalid MSG2");
2016             }
2017             break;
2018         case CASES_MSG3_KNOWN:
2019             /* Setup packet: expected only in state SENTMSG2 */
2020             if (st->state!=SITE_SENTMSG2) {
2021                 if ((st->state==SITE_SENTMSG4) &&
2022                     setup_late_msg_ok(st,buf,msgtype,source,&msg))
2023                     break;
2024                 slog(st,LOG_UNEXPECTED,"unexpected MSG3");
2025             } else if (process_msg3(st,buf,source,msgtype,&msg)) {
2026                 transport_setup_msgok(st,source);
2027                 enter_new_state(st,SITE_SENTMSG4,&msg);
2028             } else {
2029                 slog(st,LOG_SEC,"invalid MSG3");
2030             }
2031             break;
2032         case LABEL_MSG4:
2033             /* Setup packet: expected only in state SENTMSG3 */
2034             if (st->state!=SITE_SENTMSG3) {
2035                 if ((st->state==SITE_SENTMSG5) &&
2036                     setup_late_msg_ok(st,buf,msgtype,source,&msg))
2037                     break;
2038                 slog(st,LOG_UNEXPECTED,"unexpected MSG4");
2039             } else if (process_msg4(st,buf,source,&msg)) {
2040                 transport_setup_msgok(st,source);
2041                 enter_new_state(st,SITE_SENTMSG5,&msg);
2042             } else {
2043                 slog(st,LOG_SEC,"invalid MSG4");
2044             }
2045             break;
2046         case LABEL_MSG5:
2047             /* Setup packet: expected only in state SENTMSG4 */
2048             /* (may turn up in state RUN if our return MSG6 was lost
2049                and the new key has already been activated. In that
2050                case we discard it. The peer will realise that we
2051                are using the new key when they see our data packets.
2052                Until then the peer's data packets to us get discarded. */
2053             if (st->state==SITE_SENTMSG4) {
2054                 if (process_msg5(st,buf,source,st->new_transform)) {
2055                     transport_setup_msgok(st,source);
2056                     enter_new_state(st,SITE_RUN,&msg);
2057                 } else {
2058                     slog(st,LOG_SEC,"invalid MSG5");
2059                 }
2060             } else if (st->state==SITE_RUN) {
2061                 if (process_msg5(st,buf,source,st->current.transform)) {
2062                     slog(st,LOG_DROP,"got MSG5, retransmitting MSG6");
2063                     transport_setup_msgok(st,source);
2064                     create_msg6(st,st->current.transform,
2065                                 st->current.remote_session_id);
2066                     transport_xmit(st,&st->peers,&st->buffer,True);
2067                     BUF_FREE(&st->buffer);
2068                 } else {
2069                     slog(st,LOG_SEC,"invalid MSG5 (in state RUN)");
2070                 }
2071             } else {
2072                 slog(st,LOG_UNEXPECTED,"unexpected MSG5");
2073             }
2074             break;
2075         case LABEL_MSG6:
2076             /* Setup packet: expected only in state SENTMSG5 */
2077             if (st->state!=SITE_SENTMSG5) {
2078                 slog(st,LOG_UNEXPECTED,"unexpected MSG6");
2079             } else if (process_msg6(st,buf,source)) {
2080                 BUF_FREE(&st->buffer); /* Free message 5 */
2081                 transport_setup_msgok(st,source);
2082                 activate_new_key(st);
2083             } else {
2084                 slog(st,LOG_SEC,"invalid MSG6");
2085             }
2086             break;
2087         default:
2088             slog(st,LOG_SEC,"received message of unknown type 0x%08x",
2089                  msgtype);
2090             break;
2091         }
2092         BUF_FREE(buf);
2093         return True;
2094     }
2095
2096     priomsg_update_fixed(whynot, comm_notify_whynot_general,
2097                          "not MSG1 or PROD; unknown dest index");
2098     return False;
2099 }
2100
2101 static void site_control(void *vst, bool_t run)
2102 {
2103     struct site *st=vst;
2104     if (run) enter_state_run(st);
2105     else enter_state_stop(st);
2106 }
2107
2108 static void site_phase_hook(void *sst, uint32_t newphase)
2109 {
2110     struct site *st=sst;
2111
2112     /* The program is shutting down; tell our peer */
2113     send_msg7(st,"shutting down");
2114 }
2115
2116 static void site_childpersist_clearkeys(void *sst, uint32_t newphase)
2117 {
2118     struct site *st=sst;
2119     dispose_transform(&st->current.transform);
2120     dispose_transform(&st->auxiliary_key.transform);
2121     dispose_transform(&st->new_transform);
2122     /* Not much point overwiting the signing key, since we loaded it
2123        from disk, and it is only valid prospectively if at all,
2124        anyway. */
2125     /* XXX it would be best to overwrite the DH state, because that
2126        _is_ relevant to forward secrecy.  However we have no
2127        convenient interface for doing that and in practice gmp has
2128        probably dribbled droppings all over the malloc arena.  A good
2129        way to fix this would be to have a privsep child for asymmetric
2130        crypto operations, but that's a task for another day. */
2131 }
2132
2133 static void setup_sethash(struct site *st, dict_t *dict,
2134                           struct hash_if **hash, struct cloc loc,
2135                           sig_sethash_fn *sethash, void *sigkey_st) {
2136     if (!*hash) *hash=find_cl_if(dict,"hash",CL_HASH,True,"site",loc);
2137     sethash(sigkey_st,*hash);
2138 }
2139 #define SETUP_SETHASH(k) do{                                            \
2140     if ((k)->sethash)                                                   \
2141         setup_sethash(st,dict, &hash,loc, (k)->sethash,(k)->st);        \
2142 }while(0)
2143
2144 static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
2145                           list_t *args)
2146 {
2147     static uint32_t index_sequence;
2148     struct site *st;
2149     item_t *item;
2150     dict_t *dict;
2151     int i;
2152
2153     NEW(st);
2154
2155     st->cl.description="site";
2156     st->cl.type=CL_SITE;
2157     st->cl.apply=NULL;
2158     st->cl.interface=&st->ops;
2159     st->ops.st=st;
2160     st->ops.control=site_control;
2161     st->ops.status=site_status;
2162
2163     /* First parameter must be a dict */
2164     item=list_elem(args,0);
2165     if (!item || item->type!=t_dict)
2166         cfgfatal(loc,"site","parameter must be a dictionary\n");
2167     
2168     dict=item->data.dict;
2169     st->localname=dict_read_string(dict, "local-name", True, "site", loc);
2170     st->remotename=dict_read_string(dict, "name", True, "site", loc);
2171
2172     st->keepalive=dict_read_bool(dict,"keepalive",False,"site",loc,False);
2173
2174     st->peer_mobile=dict_read_bool(dict,"mobile",False,"site",loc,False);
2175     st->local_mobile=
2176         dict_read_bool(dict,"local-mobile",False,"site",loc,False);
2177
2178     /* Sanity check (which also allows the 'sites' file to include
2179        site() closures for all sites including our own): refuse to
2180        talk to ourselves */
2181     if (strcmp(st->localname,st->remotename)==0) {
2182         Message(M_DEBUG,"site %s: local-name==name -> ignoring this site\n",
2183                 st->localname);
2184         if (st->peer_mobile != st->local_mobile)
2185             cfgfatal(loc,"site","site %s's peer-mobile=%d"
2186                     " but our local-mobile=%d\n",
2187                     st->localname, st->peer_mobile, st->local_mobile);
2188         free(st);
2189         return NULL;
2190     }
2191     if (st->peer_mobile && st->local_mobile) {
2192         Message(M_WARNING,"site %s: site is mobile but so are we"
2193                 " -> ignoring this site\n", st->remotename);
2194         free(st);
2195         return NULL;
2196     }
2197
2198     assert(index_sequence < 0xffffffffUL);
2199     st->index = ++index_sequence;
2200     st->local_capabilities = 0;
2201     st->early_capabilities = CAPAB_PRIORITY_MOBILE;
2202     st->netlink=find_cl_if(dict,"link",CL_NETLINK,True,"site",loc);
2203
2204 #define GET_CLOSURE_LIST(dictkey,things,nthings,CL_TYPE) do{            \
2205     list_t *things##_cfg=dict_lookup(dict,dictkey);                     \
2206     if (!things##_cfg)                                                  \
2207         cfgfatal(loc,"site","closure list \"%s\" not found\n",dictkey); \
2208     st->nthings=list_length(things##_cfg);                              \
2209     NEW_ARY(st->things,st->nthings);                                    \
2210     assert(st->nthings);                                                \
2211     for (i=0; i<st->nthings; i++) {                                     \
2212         item_t *item=list_elem(things##_cfg,i);                         \
2213         if (item->type!=t_closure)                                      \
2214             cfgfatal(loc,"site","%s is not a closure\n",dictkey);       \
2215         closure_t *cl=item->data.closure;                               \
2216         if (cl->type!=CL_TYPE)                                          \
2217             cfgfatal(loc,"site","%s closure wrong type\n",dictkey);     \
2218         st->things[i]=cl->interface;                                    \
2219     }                                                                   \
2220 }while(0)
2221
2222     GET_CLOSURE_LIST("comm",comms,ncomms,CL_COMM);
2223
2224     NEW_ARY(st->commclientinfos, st->ncomms);
2225     dict_t *comminfo = dict_read_dict(dict,"comm-info",False,"site",loc);
2226     for (i=0; i<st->ncomms; i++) {
2227         st->commclientinfos[i] =
2228             !comminfo ? 0 :
2229             st->comms[i]->clientinfo(st->comms[i],comminfo,loc);
2230     }
2231
2232     st->resolver=find_cl_if(dict,"resolver",CL_RESOLVER,True,"site",loc);
2233     st->log=find_cl_if(dict,"log",CL_LOG,True,"site",loc);
2234     st->random=find_cl_if(dict,"random",CL_RANDOMSRC,True,"site",loc);
2235
2236     struct hash_if *hash=0;
2237     st->privkey=find_cl_if(dict,"local-key",CL_SIGPRIVKEY,True,"site",loc);
2238     st->addresses=dict_read_string_array(dict,"address",False,"site",loc,0);
2239     if (st->addresses)
2240         st->remoteport=dict_read_number(dict,"port",True,"site",loc,0);
2241     else st->remoteport=0;
2242     st->pubkey=find_cl_if(dict,"key",CL_SIGPUBKEY,True,"site",loc);
2243
2244     GET_CLOSURE_LIST("transform",transforms,ntransforms,CL_TRANSFORM);
2245
2246     st->dh=find_cl_if(dict,"dh",CL_DH,True,"site",loc);
2247
2248     SETUP_SETHASH(st->privkey);
2249     SETUP_SETHASH(st->pubkey);
2250
2251 #define DEFAULT(D) (st->peer_mobile || st->local_mobile \
2252                     ? DEFAULT_MOBILE_##D : DEFAULT_##D)
2253 #define CFG_NUMBER(k,D) dict_read_number(dict,(k),False,"site",loc,DEFAULT(D));
2254
2255     st->key_lifetime=         CFG_NUMBER("key-lifetime",  KEY_LIFETIME);
2256     st->setup_retries=        CFG_NUMBER("setup-retries", SETUP_RETRIES);
2257     st->setup_retry_interval= CFG_NUMBER("setup-timeout", SETUP_RETRY_INTERVAL);
2258     st->wait_timeout_mean=    CFG_NUMBER("wait-time",     WAIT_TIME);
2259     st->mtu_target= dict_read_number(dict,"mtu-target",False,"site",loc,0);
2260
2261     st->mobile_peer_expiry= dict_read_number(
2262        dict,"mobile-peer-expiry",False,"site",loc,DEFAULT_MOBILE_PEER_EXPIRY);
2263
2264     const char *peerskey= st->peer_mobile
2265         ? "mobile-peers-max" : "static-peers-max";
2266     st->transport_peers_max= dict_read_number(
2267         dict,peerskey,False,"site",loc, st->addresses ? 4 : 3);
2268     if (st->transport_peers_max<1 ||
2269         st->transport_peers_max>MAX_PEER_ADDRS) {
2270         cfgfatal(loc,"site", "%s must be in range 1.."
2271                  STRING(MAX_PEER_ADDRS) "\n", peerskey);
2272     }
2273
2274     if (st->key_lifetime < DEFAULT(KEY_RENEGOTIATE_GAP)*2)
2275         st->key_renegotiate_time=st->key_lifetime/2;
2276     else
2277         st->key_renegotiate_time=st->key_lifetime-DEFAULT(KEY_RENEGOTIATE_GAP);
2278     st->key_renegotiate_time=dict_read_number(
2279         dict,"renegotiate-time",False,"site",loc,st->key_renegotiate_time);
2280     if (st->key_renegotiate_time > st->key_lifetime) {
2281         cfgfatal(loc,"site",
2282                  "renegotiate-time must be less than key-lifetime\n");
2283     }
2284
2285     st->log_events=string_list_to_word(dict_lookup(dict,"log-events"),
2286                                        log_event_table,"site");
2287
2288     st->resolving_count=0;
2289     st->allow_send_prod=0;
2290
2291     st->tunname=safe_malloc(strlen(st->localname)+strlen(st->remotename)+5,
2292                             "site_apply");
2293     sprintf(st->tunname,"%s<->%s",st->localname,st->remotename);
2294
2295     /* The information we expect to see in incoming messages of type 1 */
2296     /* fixme: lots of unchecked overflows here, but the results are only
2297        corrupted packets rather than undefined behaviour */
2298     st->our_name_later=(strcmp(st->localname,st->remotename)>0);
2299
2300     buffer_new(&st->buffer,SETUP_BUFFER_LEN);
2301
2302     buffer_new(&st->scratch,SETUP_BUFFER_LEN);
2303     BUF_ALLOC(&st->scratch,"site:scratch");
2304
2305     /* We are interested in poll(), but only for timeouts. We don't have
2306        any fds of our own. */
2307     register_for_poll(st, site_beforepoll, site_afterpoll, "site");
2308     st->timeout=0;
2309
2310     st->remote_capabilities=0;
2311     st->chosen_transform=0;
2312     st->current.key_timeout=0;
2313     st->auxiliary_key.key_timeout=0;
2314     transport_peers_clear(st,&st->peers);
2315     transport_peers_clear(st,&st->setup_peers);
2316     /* XXX mlock these */
2317     st->dhsecret=safe_malloc(st->dh->len,"site:dhsecret");
2318     st->sharedsecretlen=st->sharedsecretallocd=0;
2319     st->sharedsecret=0;
2320
2321 #define SET_CAPBIT(bit) do {                                            \
2322     uint32_t capflag = 1UL << (bit);                                    \
2323     if (st->local_capabilities & capflag)                               \
2324         slog(st,LOG_ERROR,"capability bit"                              \
2325              " %d (%#"PRIx32") reused", (bit), capflag);                \
2326     st->local_capabilities |= capflag;                                  \
2327 } while (0)
2328
2329     for (i=0; i<st->ntransforms; i++)
2330         SET_CAPBIT(st->transforms[i]->capab_bit);
2331
2332 #undef SET_CAPBIT
2333
2334     if (st->local_mobile || st->peer_mobile)
2335         st->local_capabilities |= CAPAB_PRIORITY_MOBILE;
2336
2337     /* We need to register the remote networks with the netlink device */
2338     uint32_t netlink_mtu; /* local virtual interface mtu */
2339     st->netlink->reg(st->netlink->st, site_outgoing, st, &netlink_mtu);
2340     if (!st->mtu_target)
2341         st->mtu_target=netlink_mtu;
2342     
2343     for (i=0; i<st->ncomms; i++)
2344         st->comms[i]->request_notify(st->comms[i]->st, st, site_incoming);
2345
2346     st->current.transform=0;
2347     st->auxiliary_key.transform=0;
2348     st->new_transform=0;
2349     st->auxiliary_is_new=0;
2350
2351     enter_state_stop(st);
2352
2353     add_hook(PHASE_SHUTDOWN,site_phase_hook,st);
2354     add_hook(PHASE_CHILDPERSIST,site_childpersist_clearkeys,st);
2355
2356     return new_closure(&st->cl);
2357 }
2358
2359 void site_module(dict_t *dict)
2360 {
2361     add_closure(dict,"site",site_apply);
2362 }
2363
2364
2365 /***** TRANSPORT PEERS definitions *****/
2366
2367 static void transport_peers_debug(struct site *st, transport_peers *dst,
2368                                   const char *didwhat,
2369                                   int nargs, const struct comm_addr *args,
2370                                   size_t stride) {
2371     int i;
2372     char *argp;
2373
2374     if (!(st->log_events & LOG_PEER_ADDRS))
2375         return; /* an optimisation */
2376
2377     slog(st, LOG_PEER_ADDRS, "peers (%s) %s nargs=%d => npeers=%d",
2378          (dst==&st->peers ? "data" :
2379           dst==&st->setup_peers ? "setup" : "UNKNOWN"),
2380          didwhat, nargs, dst->npeers);
2381
2382     for (i=0, argp=(void*)args;
2383          i<nargs;
2384          i++, (argp+=stride?stride:sizeof(*args))) {
2385         const struct comm_addr *ca=(void*)argp;
2386         slog(st, LOG_PEER_ADDRS, " args: addrs[%d]=%s",
2387              i, comm_addr_to_string(ca));
2388     }
2389     for (i=0; i<dst->npeers; i++) {
2390         struct timeval diff;
2391         timersub(tv_now,&dst->peers[i].last,&diff);
2392         const struct comm_addr *ca=&dst->peers[i].addr;
2393         slog(st, LOG_PEER_ADDRS, " peers: addrs[%d]=%s T-%ld.%06ld",
2394              i, comm_addr_to_string(ca),
2395              (unsigned long)diff.tv_sec, (unsigned long)diff.tv_usec);
2396     }
2397 }
2398
2399 static void transport_peers_expire(struct site *st, transport_peers *peers) {
2400     /* peers must be sorted first */
2401     int previous_peers=peers->npeers;
2402     struct timeval oldest;
2403     oldest.tv_sec  = tv_now->tv_sec - st->mobile_peer_expiry;
2404     oldest.tv_usec = tv_now->tv_usec;
2405     while (peers->npeers>1 &&
2406            timercmp(&peers->peers[peers->npeers-1].last, &oldest, <))
2407         peers->npeers--;
2408     if (peers->npeers != previous_peers)
2409         transport_peers_debug(st,peers,"expire", 0,0,0);
2410 }
2411
2412 static bool_t transport_peer_record_one(struct site *st, transport_peers *peers,
2413                                         const struct comm_addr *ca,
2414                                         const struct timeval *tv) {
2415     /* returns false if output is full */
2416     int search;
2417
2418     if (peers->npeers >= st->transport_peers_max)
2419         return 0;
2420
2421     for (search=0; search<peers->npeers; search++)
2422         if (comm_addr_equal(&peers->peers[search].addr, ca))
2423             return 1;
2424
2425     peers->peers[peers->npeers].addr = *ca;
2426     peers->peers[peers->npeers].last = *tv;
2427     peers->npeers++;
2428     return 1;
2429 }
2430
2431 static void transport_record_peers(struct site *st, transport_peers *peers,
2432                                    const struct comm_addr *addrs, int naddrs,
2433                                    const char *m) {
2434     /* We add addrs into peers.  The new entries end up at the front
2435      * and displace entries towards the end (perhaps even off the
2436      * end).  Any existing matching entries are moved up to the front.
2437      *
2438      * Caller must first call transport_peers_expire. */
2439
2440     if (naddrs==1) {
2441         /* avoids debug for uninteresting updates */
2442         int i;
2443         for (i=0; i<peers->npeers; i++) {
2444             if (comm_addr_equal(&addrs[0], &peers->peers[i].addr)) {
2445                 memmove(peers->peers+1, peers->peers,
2446                         sizeof(peers->peers[0]) * i);
2447                 peers->peers[0].addr = addrs[0];
2448                 peers->peers[0].last = *tv_now;
2449                 return;
2450             }
2451         }
2452     }
2453
2454     int old_npeers=peers->npeers;
2455     transport_peer old_peers[old_npeers];
2456     COPY_ARRAY(old_peers,peers->peers,old_npeers);
2457
2458     peers->npeers=0;
2459     int i;
2460     for (i=0; i<naddrs; i++) {
2461         if (!transport_peer_record_one(st,peers, &addrs[i], tv_now))
2462             break;
2463     }
2464     for (i=0; i<old_npeers; i++) {
2465         const transport_peer *old=&old_peers[i];
2466         if (!transport_peer_record_one(st,peers, &old->addr, &old->last))
2467             break;
2468     }
2469
2470     transport_peers_debug(st,peers,m, naddrs,addrs,0);
2471 }
2472
2473 static void transport_expire_record_peers(struct site *st,
2474                                           transport_peers *peers,
2475                                           const struct comm_addr *addrs,
2476                                           int naddrs, const char *m) {
2477     /* Convenience function */
2478     transport_peers_expire(st,peers);
2479     transport_record_peers(st,peers,addrs,naddrs,m);
2480 }
2481
2482 static bool_t transport_compute_setupinit_peers(struct site *st,
2483         const struct comm_addr *configured_addrs /* 0 if none or not found */,
2484         int n_configured_addrs /* 0 if none or not found */,
2485         const struct comm_addr *incoming_packet_addr /* 0 if none */) {
2486     if (!n_configured_addrs && !incoming_packet_addr &&
2487         !transport_peers_valid(&st->peers))
2488         return False;
2489
2490     slog(st,LOG_SETUP_INIT,
2491          "using: %d configured addr(s);%s %d old peer addrs(es)",
2492          n_configured_addrs,
2493          incoming_packet_addr ? " incoming packet address;" : "",
2494          st->peers.npeers);
2495
2496     /* Non-mobile peers try addresses until one is plausible.  The
2497      * effect is that this code always tries first the configured
2498      * address if supplied, or otherwise the address of the incoming
2499      * PROD, or finally the existing data peer if one exists; this is
2500      * as desired. */
2501
2502     transport_peers_copy(st,&st->setup_peers,&st->peers);
2503     transport_peers_expire(st,&st->setup_peers);
2504
2505     if (incoming_packet_addr)
2506         transport_record_peers(st,&st->setup_peers,
2507                                incoming_packet_addr,1, "incoming");
2508
2509     if (n_configured_addrs)
2510         transport_record_peers(st,&st->setup_peers,
2511                               configured_addrs,n_configured_addrs, "setupinit");
2512
2513     assert(transport_peers_valid(&st->setup_peers));
2514     return True;
2515 }
2516
2517 static void transport_setup_msgok(struct site *st, const struct comm_addr *a) {
2518     if (st->peer_mobile)
2519         transport_expire_record_peers(st,&st->setup_peers,a,1,"setupmsg");
2520 }
2521 static void transport_data_msgok(struct site *st, const struct comm_addr *a) {
2522     if (st->peer_mobile)
2523         transport_expire_record_peers(st,&st->peers,a,1,"datamsg");
2524 }
2525
2526 static int transport_peers_valid(transport_peers *peers) {
2527     return peers->npeers;
2528 }
2529 static void transport_peers_clear(struct site *st, transport_peers *peers) {
2530     peers->npeers= 0;
2531     transport_peers_debug(st,peers,"clear",0,0,0);
2532 }
2533 static void transport_peers_copy(struct site *st, transport_peers *dst,
2534                                  const transport_peers *src) {
2535     dst->npeers=src->npeers;
2536     COPY_ARRAY(dst->peers, src->peers, dst->npeers);
2537     transport_peers_debug(st,dst,"copy",
2538                           src->npeers, &src->peers->addr, sizeof(*src->peers));
2539 }
2540
2541 static void transport_resolve_complete(struct site *st,
2542                                        const struct comm_addr *addrs,
2543                                        int naddrs) {
2544     transport_expire_record_peers(st,&st->peers,addrs,naddrs,
2545                                   "resolved data");
2546     transport_expire_record_peers(st,&st->setup_peers,addrs,naddrs,
2547                                   "resolved setup");
2548 }
2549
2550 static void transport_resolve_complete_tardy(struct site *st,
2551                                              const struct comm_addr *addrs,
2552                                              int naddrs) {
2553     transport_expire_record_peers(st,&st->peers,addrs,naddrs,
2554                                   "resolved tardily");
2555 }
2556
2557 static void transport_peers__copy_by_mask(transport_peer *out, int *nout_io,
2558                                           unsigned mask,
2559                                           const transport_peers *inp) {
2560     /* out and in->peers may be the same region, or nonoverlapping */
2561     const transport_peer *in=inp->peers;
2562     int slot;
2563     for (slot=0; slot<inp->npeers; slot++) {
2564         if (!(mask & (1U << slot)))
2565             continue;
2566         if (!(out==in && slot==*nout_io))
2567             COPY_OBJ(out[*nout_io], in[slot]);
2568         (*nout_io)++;
2569     }
2570 }
2571
2572 void transport_xmit(struct site *st, transport_peers *peers,
2573                     struct buffer_if *buf, bool_t candebug) {
2574     int slot;
2575     transport_peers_expire(st, peers);
2576     unsigned failed=0; /* bitmask */
2577     assert(MAX_PEER_ADDRS < sizeof(unsigned)*CHAR_BIT);
2578
2579     int nfailed=0;
2580     for (slot=0; slot<peers->npeers; slot++) {
2581         transport_peer *peer=&peers->peers[slot];
2582         bool_t ok = comm_addr_sendmsg(st, &peer->addr, buf);
2583         if (candebug)
2584             dump_packet(st, buf, &peer->addr, False, ok);
2585         if (!ok) {
2586             failed |= 1U << slot;
2587             nfailed++;
2588         }
2589         if (ok && !st->peer_mobile)
2590             break;
2591     }
2592     /* Now we need to demote/delete failing addrs: if we are mobile we
2593      * merely demote them; otherwise we delete them. */
2594     if (st->local_mobile) {
2595         unsigned expected = ((1U << nfailed)-1) << (peers->npeers-nfailed);
2596         /* `expected' has all the failures at the end already */
2597         if (failed != expected) {
2598             int fslot=0;
2599             transport_peer failedpeers[nfailed];
2600             transport_peers__copy_by_mask(failedpeers, &fslot, failed,peers);
2601             assert(fslot == nfailed);
2602             int wslot=0;
2603             transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
2604             assert(wslot+nfailed == peers->npeers);
2605             COPY_ARRAY(peers->peers+wslot, failedpeers, nfailed);
2606             transport_peers_debug(st,peers,"mobile failure reorder",0,0,0);
2607         }
2608     } else {
2609         if (failed && peers->npeers > 1) {
2610             int wslot=0;
2611             transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
2612             peers->npeers=wslot;
2613             transport_peers_debug(st,peers,"non-mobile failure cleanup",0,0,0);
2614         }
2615     }
2616 }
2617
2618 /***** END of transport peers declarations *****/