chiark / gitweb /
site: Do name resolution on peer-initiated key setup too
authorIan Jackson <ijackson@chiark.greenend.org.uk>
Tue, 13 May 2014 23:40:44 +0000 (00:40 +0100)
committerIan Jackson <ijackson@chiark.greenend.org.uk>
Sun, 18 May 2014 13:53:13 +0000 (14:53 +0100)
The current arrangement locks in the peer address used for key
exchange, for the lifetime of the key.

Most configurations do not have the secnet bind to a particular
address.  And secnet takes no particular care about the source address
on its packets.  The result is that secnet might reply from a
different address.  (Also NAT might cause this effect.)

But (unless the peer is mobile) it is not ideal to use an address
other than the configured address.  In particular, if we are mobile
then the network environment, and our routing to the peer, might
change so that the previous source address is not valid.  This could
result in an extended failure (of up to the key expiry lifetime).

Arguably this is a configuration error, but there is another reason to
dislike the current behaviour: it has the rather odd property that if
an opponent (or incompetent middlebox) reroutes/NATs the packets
during key exchange, the entire dataflow (at least in one direction)
might end up sent via a bizarre route (or, if the environment changes,
not delivered).

We still want to use the peer's address as a hint though: otherwise we
would have to stall a peer-initiated key setup while resolution takes
place.

So:

 * Initiate a peer address lookup when we get an incoming MSG1, if we
   have a configured name or address.  We do this in parallel with the
   key exchange.  (As a result it is possible that a peer address
   lookup might complete well after the key exchange has finished.)

 * Except, when the incoming MSG1 has crossed with ours, we must
   already have done the the lookup so do not do it again.

 * And, in this latter case do not unconditionally record the incoming
   peer address; instead, treat it as a "msgok"; otherwise we might
   unjustifiably overwrite an address we got from the configuration
   with the incoming address from the packet.

 * The two points above mean moving the transport_record_peer from
   process_msg1 into its two call sites, since the logic now needs to
   differ between them.

 * Handle the results of the MSG1-prompted peer address lookup.  In
   SITE_RESOLVE we do as we did before.  In most of the other states,
   we record the address and use it for future communications.

 * If the resolution fails with a non-mobile site, keep using the
   apparently-working peer address(es).

 * With a mobile site the currently working peer address might stop
   working so this is not acceptable.  In that case, make arrangements
   that a failed peer address lookup will be retried quickly - but in
   the meantime, there is no need to halt the packet flow.

 * If the attempt to submit the resolver query fails, just use the
   apparent peer address as before.

Signed-off-by: Ian Jackson <ijackson@chiark.greenend.org.uk>
---
Changes in v2:
  * Run the resolution after entering the new state, not before.
    Otherwise if we get the callback reentrantly, we get a bit
    confused.

site.c

diff --git a/site.c b/site.c
index 0e2c43a..379ab0d 100644 (file)
--- a/site.c
+++ b/site.c
@@ -699,7 +699,6 @@ static bool_t process_msg1(struct site *st, struct buffer_if *msg1,
        process an incoming MSG1, and that the MSG1 has correct values
        of A and B. */
 
        process an incoming MSG1, and that the MSG1 has correct values
        of A and B. */
 
-    transport_record_peer(st,&st->setup_peers,src,"msg1");
     st->setup_session_id=m->source;
     st->remote_capabilities=m->remote_capabilities;
     memcpy(st->remoteN,m->nR,NONCELEN);
     st->setup_session_id=m->source;
     st->remote_capabilities=m->remote_capabilities;
     memcpy(st->remoteN,m->nR,NONCELEN);
@@ -1150,10 +1149,6 @@ static void site_resolve_callback(void *sst, struct in_addr *address)
 
     st->resolving=False;
 
 
     st->resolving=False;
 
-    if (st->state!=SITE_RESOLVE) {
-       slog(st,LOG_UNEXPECTED,"site_resolve_callback called unexpectedly");
-       return;
-    }
     if (address) {
        FILLZERO(ca_buf);
        ca_buf.comm=st->comms[0];
     if (address) {
        FILLZERO(ca_buf);
        ca_buf.comm=st->comms[0];
@@ -1167,12 +1162,60 @@ static void site_resolve_callback(void *sst, struct in_addr *address)
        slog(st,LOG_ERROR,"resolution of %s failed",st->address);
        ca_use=0;
     }
        slog(st,LOG_ERROR,"resolution of %s failed",st->address);
        ca_use=0;
     }
-    if (transport_compute_setupinit_peers(st,ca_use,0)) {
-       enter_new_state(st,SITE_SENTMSG1);
-    } else {
-       /* Can't figure out who to try to to talk to */
-       slog(st,LOG_SETUP_INIT,"key exchange failed: cannot find peer address");
-       enter_state_run(st);
+
+    switch (st->state) {
+    case SITE_RESOLVE:
+        if (transport_compute_setupinit_peers(st,ca_use,0)) {
+           enter_new_state(st,SITE_SENTMSG1);
+       } else {
+           /* Can't figure out who to try to to talk to */
+           slog(st,LOG_SETUP_INIT,
+                "key exchange failed: cannot find peer address");
+           enter_state_run(st);
+       }
+       break;
+    case SITE_SENTMSG1: case SITE_SENTMSG2:
+    case SITE_SENTMSG3: case SITE_SENTMSG4:
+    case SITE_SENTMSG5:
+       if (ca_use) {
+           /* We start using the address immediately for data too.
+            * It's best to store it in st->peers now because we might
+            * go via SENTMSG5, WAIT, and a MSG0, straight into using
+            * the new key (without updating the data peer addrs). */
+           transport_record_peer(st,&st->peers,ca_use,"resolved data");
+           transport_record_peer(st,&st->setup_peers,ca_use,"resolved setup");
+       } else if (st->local_mobile) {
+           /* We can't let this rest because we may have a peer
+            * address which will break in the future. */
+           slog(st,LOG_SETUP_INIT,"resolution of %s failed: "
+                "abandoning key exchange",st->address);
+           enter_state_wait(st);
+       } else {
+           slog(st,LOG_SETUP_INIT,"resolution of %s failed: "
+                " continuing to use source address of peer's packets"
+                " for key exchange and ultimately data",
+                st->address);
+       }
+       break;
+    case SITE_RUN:
+       if (ca_use) {
+           slog(st,LOG_SETUP_INIT,"resolution of %s completed tardily,"
+                " updating peer address(es)",st->address);
+           transport_record_peer(st,&st->peers,ca_use,"resolved tardily");
+       } else if (st->local_mobile) {
+           /* Not very good.  We should queue (another) renegotiation
+            * so that we can update the peer address. */
+           st->key_renegotiate_time=st->now+st->wait_timeout;
+       } else {
+           slog(st,LOG_SETUP_INIT,"resolution of %s failed: "
+                " continuing to use source address of peer's packets",
+                st->address);
+       }
+       break;
+    case SITE_WAIT:
+    case SITE_STOP:
+       /* oh well */
+       break;
     }
 }
 
     }
 }
 
@@ -1584,9 +1627,14 @@ static bool_t site_incoming(void *sst, struct buffer_if *buf,
        if (st->state==SITE_RUN || st->state==SITE_RESOLVE ||
            st->state==SITE_WAIT) {
            /* We should definitely process it */
        if (st->state==SITE_RUN || st->state==SITE_RESOLVE ||
            st->state==SITE_WAIT) {
            /* We should definitely process it */
+           transport_record_peer(st,&st->setup_peers,source,"msg1");
            if (process_msg1(st,buf,source,&named_msg)) {
                slog(st,LOG_SETUP_INIT,"key setup initiated by peer");
            if (process_msg1(st,buf,source,&named_msg)) {
                slog(st,LOG_SETUP_INIT,"key setup initiated by peer");
-               enter_new_state(st,SITE_SENTMSG2);
+               bool_t entered=enter_new_state(st,SITE_SENTMSG2);
+               if (entered && st->address)
+                   /* We must do this as the very last thing, because
+                      the resolver callback might reenter us. */
+                   ensure_resolving(st);
            } else {
                slog(st,LOG_ERROR,"failed to process incoming msg1");
            }
            } else {
                slog(st,LOG_ERROR,"failed to process incoming msg1");
            }
@@ -1606,6 +1654,7 @@ static bool_t site_incoming(void *sst, struct buffer_if *buf,
                     "priority => use incoming msg1");
                if (process_msg1(st,buf,source,&named_msg)) {
                    BUF_FREE(&st->buffer); /* Free our old message 1 */
                     "priority => use incoming msg1");
                if (process_msg1(st,buf,source,&named_msg)) {
                    BUF_FREE(&st->buffer); /* Free our old message 1 */
+                   transport_setup_msgok(st,source);
                    enter_new_state(st,SITE_SENTMSG2);
                } else {
                    slog(st,LOG_ERROR,"failed to process an incoming "
                    enter_new_state(st,SITE_SENTMSG2);
                } else {
                    slog(st,LOG_ERROR,"failed to process an incoming "