chiark / gitweb /
site: transport peers: Break out transport_resolve_complete,_tardy
[secnet.git] / site.c
diff --git a/site.c b/site.c
index f87328f..afa85e8 100644 (file)
--- a/site.c
+++ b/site.c
@@ -208,6 +208,10 @@ static void transport_data_msgok(struct site *st, const struct comm_addr *a);
 static bool_t transport_compute_setupinit_peers(struct site *st,
         const struct comm_addr *configured_addr /* 0 if none or not found */,
         const struct comm_addr *prod_hint_addr /* 0 if none */);
+static void transport_resolve_complete(struct site *st,
+                                      const struct comm_addr *a);
+static void transport_resolve_complete_tardy(struct site *st,
+                                            const struct comm_addr *ca_use);
 static void transport_record_peer(struct site *st, transport_peers *peers,
                                  const struct comm_addr *addr, const char *m);
 
@@ -229,7 +233,7 @@ struct site {
 /* configuration information */
     string_t localname;
     string_t remotename;
-    bool_t peer_mobile; /* Mobile client support */
+    bool_t local_mobile, peer_mobile; /* Mobile client support */
     int32_t transport_peers_max;
     string_t tunname; /* localname<->remotename by default, used in logs */
     string_t address; /* DNS name for bootstrapping, optional */
@@ -267,6 +271,7 @@ struct site {
     uint32_t state;
     uint64_t now; /* Most recently seen time */
     bool_t allow_send_prod;
+    bool_t resolving;
 
     /* The currently established session */
     struct data_key current;
@@ -299,35 +304,75 @@ struct site {
     struct transform_inst_if *new_transform; /* For key setup/verify */
 };
 
+static uint32_t event_log_priority(struct site *st, uint32_t event)
+{
+    if (!(event&st->log_events))
+       return 0;
+    switch(event) {
+    case LOG_UNEXPECTED:    return M_INFO;
+    case LOG_SETUP_INIT:    return M_INFO;
+    case LOG_SETUP_TIMEOUT: return M_NOTICE;
+    case LOG_ACTIVATE_KEY:  return M_INFO;
+    case LOG_TIMEOUT_KEY:   return M_INFO;
+    case LOG_SEC:           return M_SECURITY;
+    case LOG_STATE:         return M_DEBUG;
+    case LOG_DROP:          return M_DEBUG;
+    case LOG_DUMP:          return M_DEBUG;
+    case LOG_ERROR:         return M_ERR;
+    case LOG_PEER_ADDRS:    return M_DEBUG;
+    default:                return M_ERR;
+    }
+}
+
+static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
+FORMAT(printf,3,0);
+static void vslog(struct site *st, uint32_t event, cstring_t msg, va_list ap)
+{
+    uint32_t class;
+
+    class=event_log_priority(st, event);
+    if (class) {
+       slilog_part(st->log,class,"%s: ",st->tunname);
+       vslilog_part(st->log,class,msg,ap);
+       slilog_part(st->log,class,"\n");
+    }
+}
+
 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
 FORMAT(printf,3,4);
 static void slog(struct site *st, uint32_t event, cstring_t msg, ...)
 {
     va_list ap;
-    char buf[240];
-    uint32_t class;
-
     va_start(ap,msg);
+    vslog(st,event,msg,ap);
+    va_end(ap);
+}
 
-    if (event&st->log_events) {
-       switch(event) {
-       case LOG_UNEXPECTED: class=M_INFO; break;
-       case LOG_SETUP_INIT: class=M_INFO; break;
-       case LOG_SETUP_TIMEOUT: class=M_NOTICE; break;
-       case LOG_ACTIVATE_KEY: class=M_INFO; break;
-       case LOG_TIMEOUT_KEY: class=M_INFO; break;
-       case LOG_SEC: class=M_SECURITY; break;
-       case LOG_STATE: class=M_DEBUG; break;
-       case LOG_DROP: class=M_DEBUG; break;
-       case LOG_DUMP: class=M_DEBUG; break;
-       case LOG_ERROR: class=M_ERR; break;
-       case LOG_PEER_ADDRS: class=M_DEBUG; break;
-       default: class=M_ERR; break;
-       }
+static void logtimeout(struct site *st, const char *fmt, ...)
+FORMAT(printf,2,3);
+static void logtimeout(struct site *st, const char *fmt, ...)
+{
+    uint32_t class=event_log_priority(st,LOG_SETUP_TIMEOUT);
+    if (!class)
+       return;
+
+    va_list ap;
+    va_start(ap,fmt);
+
+    slilog_part(st->log,class,"%s: ",st->tunname);
+    vslilog_part(st->log,class,fmt,ap);
 
-       vsnprintf(buf,sizeof(buf),msg,ap);
-       slilog(st->log,class,"%s: %s",st->tunname,buf);
+    const char *delim;
+    int i;
+    for (i=0, delim=" (tried ";
+        i<st->setup_peers.npeers;
+        i++, delim=", ") {
+       transport_peer *peer=&st->setup_peers.peers[i];
+       const char *s=comm_addr_to_string(&peer->addr);
+       slilog_part(st->log,class,"%s%s",delim,s);
     }
+
+    slilog_part(st->log,class,")\n");
     va_end(ap);
 }
 
@@ -658,7 +703,6 @@ static bool_t process_msg1(struct site *st, struct buffer_if *msg1,
        process an incoming MSG1, and that the MSG1 has correct values
        of A and B. */
 
-    transport_record_peer(st,&st->setup_peers,src,"msg1");
     st->setup_session_id=m->source;
     st->remote_capabilities=m->remote_capabilities;
     memcpy(st->remoteN,m->nR,NONCELEN);
@@ -1077,7 +1121,7 @@ static bool_t send_msg(struct site *st)
        st->retries--;
        return True;
     } else if (st->state==SITE_SENTMSG5) {
-       slog(st,LOG_SETUP_TIMEOUT,"timed out sending MSG5, stashing new key");
+       logtimeout(st,"timed out sending MSG5, stashing new key");
        /* We stash the key we have produced, in case it turns out that
         * our peer did see our MSG5 after all and starts using it. */
        /* This is a bit like some of activate_new_key */
@@ -1095,7 +1139,7 @@ static bool_t send_msg(struct site *st)
        enter_state_wait(st);
        return False;
     } else {
-       slog(st,LOG_SETUP_TIMEOUT,"timed out sending key setup packet "
+       logtimeout(st,"timed out sending key setup packet "
            "(in state %s)",state_name(st->state));
        enter_state_wait(st);
        return False;
@@ -1107,10 +1151,8 @@ static void site_resolve_callback(void *sst, struct in_addr *address)
     struct site *st=sst;
     struct comm_addr ca_buf, *ca_use;
 
-    if (st->state!=SITE_RESOLVE) {
-       slog(st,LOG_UNEXPECTED,"site_resolve_callback called unexpectedly");
-       return;
-    }
+    st->resolving=False;
+
     if (address) {
        FILLZERO(ca_buf);
        ca_buf.comm=st->comms[0];
@@ -1118,22 +1160,72 @@ static void site_resolve_callback(void *sst, struct in_addr *address)
        ca_buf.sin.sin_port=htons(st->remoteport);
        ca_buf.sin.sin_addr=*address;
        ca_use=&ca_buf;
+       slog(st,LOG_STATE,"resolution of %s completed: %s",
+            st->address, comm_addr_to_string(ca_use));;
     } else {
        slog(st,LOG_ERROR,"resolution of %s failed",st->address);
        ca_use=0;
     }
-    if (transport_compute_setupinit_peers(st,ca_use,0)) {
-       enter_new_state(st,SITE_SENTMSG1);
-    } else {
-       /* Can't figure out who to try to to talk to */
-       slog(st,LOG_SETUP_INIT,"key exchange failed: cannot find peer address");
-       enter_state_run(st);
+
+    switch (st->state) {
+    case SITE_RESOLVE:
+        if (transport_compute_setupinit_peers(st,ca_use,0)) {
+           enter_new_state(st,SITE_SENTMSG1);
+       } else {
+           /* Can't figure out who to try to to talk to */
+           slog(st,LOG_SETUP_INIT,
+                "key exchange failed: cannot find peer address");
+           enter_state_run(st);
+       }
+       break;
+    case SITE_SENTMSG1: case SITE_SENTMSG2:
+    case SITE_SENTMSG3: case SITE_SENTMSG4:
+    case SITE_SENTMSG5:
+       if (ca_use) {
+           /* We start using the address immediately for data too.
+            * It's best to store it in st->peers now because we might
+            * go via SENTMSG5, WAIT, and a MSG0, straight into using
+            * the new key (without updating the data peer addrs). */
+           transport_resolve_complete(st,ca_use);
+       } else if (st->local_mobile) {
+           /* We can't let this rest because we may have a peer
+            * address which will break in the future. */
+           slog(st,LOG_SETUP_INIT,"resolution of %s failed: "
+                "abandoning key exchange",st->address);
+           enter_state_wait(st);
+       } else {
+           slog(st,LOG_SETUP_INIT,"resolution of %s failed: "
+                " continuing to use source address of peer's packets"
+                " for key exchange and ultimately data",
+                st->address);
+       }
+       break;
+    case SITE_RUN:
+       if (ca_use) {
+           slog(st,LOG_SETUP_INIT,"resolution of %s completed tardily,"
+                " updating peer address(es)",st->address);
+           transport_resolve_complete_tardy(st,ca_use);
+       } else if (st->local_mobile) {
+           /* Not very good.  We should queue (another) renegotiation
+            * so that we can update the peer address. */
+           st->key_renegotiate_time=st->now+st->wait_timeout;
+       } else {
+           slog(st,LOG_SETUP_INIT,"resolution of %s failed: "
+                " continuing to use source address of peer's packets",
+                st->address);
+       }
+       break;
+    case SITE_WAIT:
+    case SITE_STOP:
+       /* oh well */
+       break;
     }
 }
 
 static bool_t initiate_key_setup(struct site *st, cstring_t reason,
                                 const struct comm_addr *prod_hint)
 {
+    /* Reentrancy hazard: can call enter_new_state/enter_state_* */
     if (st->state!=SITE_RUN) return False;
     slog(st,LOG_SETUP_INIT,"initiating key exchange (%s)",reason);
     if (st->address) {
@@ -1244,14 +1336,35 @@ static void enter_state_run(struct site *st)
     set_link_quality(st);
 }
 
+static bool_t ensure_resolving(struct site *st)
+{
+    /* Reentrancy hazard: may call site_resolve_callback and hence
+     * enter_new_state, enter_state_* and generate_msg*. */
+    if (st->resolving)
+        return True;
+
+    assert(st->address);
+
+    /* resolver->request might reentrantly call site_resolve_callback
+     * which will clear st->resolving, so we need to set it beforehand
+     * rather than afterwards; also, it might return False, in which
+     * case we have to clear ->resolving again. */
+    st->resolving=True;
+    bool_t ok = st->resolver->request(st->resolver->st,st->address,
+                                     site_resolve_callback,st);
+    if (!ok)
+       st->resolving=False;
+
+    return ok;
+}
+
 static bool_t enter_state_resolve(struct site *st)
 {
+    /* Reentrancy hazard!  See ensure_resolving. */
     state_assert(st,st->state==SITE_RUN);
     slog(st,LOG_STATE,"entering state RESOLVE");
     st->state=SITE_RESOLVE;
-    st->resolver->request(st->resolver->st,st->address,
-                         site_resolve_callback,st);
-    return True;
+    return ensure_resolving(st);
 }
 
 static bool_t enter_new_state(struct site *st, uint32_t next)
@@ -1517,9 +1630,14 @@ static bool_t site_incoming(void *sst, struct buffer_if *buf,
        if (st->state==SITE_RUN || st->state==SITE_RESOLVE ||
            st->state==SITE_WAIT) {
            /* We should definitely process it */
+           transport_record_peer(st,&st->setup_peers,source,"msg1");
            if (process_msg1(st,buf,source,&named_msg)) {
                slog(st,LOG_SETUP_INIT,"key setup initiated by peer");
-               enter_new_state(st,SITE_SENTMSG2);
+               bool_t entered=enter_new_state(st,SITE_SENTMSG2);
+               if (entered && st->address && st->local_mobile)
+                   /* We must do this as the very last thing, because
+                      the resolver callback might reenter us. */
+                   ensure_resolving(st);
            } else {
                slog(st,LOG_ERROR,"failed to process incoming msg1");
            }
@@ -1539,6 +1657,7 @@ static bool_t site_incoming(void *sst, struct buffer_if *buf,
                     "priority => use incoming msg1");
                if (process_msg1(st,buf,source,&named_msg)) {
                    BUF_FREE(&st->buffer); /* Free our old message 1 */
+                   transport_setup_msgok(st,source);
                    enter_new_state(st,SITE_SENTMSG2);
                } else {
                    slog(st,LOG_ERROR,"failed to process an incoming "
@@ -1722,7 +1841,7 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     st->remotename=dict_read_string(dict, "name", True, "site", loc);
 
     st->peer_mobile=dict_read_bool(dict,"mobile",False,"site",loc,False);
-    bool_t local_mobile=
+    st->local_mobile=
        dict_read_bool(dict,"local-mobile",False,"site",loc,False);
 
     /* Sanity check (which also allows the 'sites' file to include
@@ -1731,14 +1850,14 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     if (strcmp(st->localname,st->remotename)==0) {
        Message(M_DEBUG,"site %s: local-name==name -> ignoring this site\n",
                st->localname);
-       if (st->peer_mobile != local_mobile)
+       if (st->peer_mobile != st->local_mobile)
            cfgfatal(loc,"site","site %s's peer-mobile=%d"
                    " but our local-mobile=%d\n",
-                   st->localname, st->peer_mobile, local_mobile);
+                   st->localname, st->peer_mobile, st->local_mobile);
        free(st);
        return NULL;
     }
-    if (st->peer_mobile && local_mobile) {
+    if (st->peer_mobile && st->local_mobile) {
        Message(M_WARNING,"site %s: site is mobile but so are we"
                " -> ignoring this site\n", st->remotename);
        free(st);
@@ -1786,7 +1905,7 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     st->dh=find_cl_if(dict,"dh",CL_DH,True,"site",loc);
     st->hash=find_cl_if(dict,"hash",CL_HASH,True,"site",loc);
 
-#define DEFAULT(D) (st->peer_mobile || local_mobile    \
+#define DEFAULT(D) (st->peer_mobile || st->local_mobile        \
                     ? DEFAULT_MOBILE_##D : DEFAULT_##D)
 #define CFG_NUMBER(k,D) dict_read_number(dict,(k),False,"site",loc,DEFAULT(D));
 
@@ -1821,6 +1940,7 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     st->log_events=string_list_to_word(dict_lookup(dict,"log-events"),
                                       log_event_table,"site");
 
+    st->resolving=False;
     st->allow_send_prod=0;
 
     st->tunname=safe_malloc(strlen(st->localname)+strlen(st->remotename)+5,
@@ -1911,14 +2031,14 @@ static void transport_peers_debug(struct site *st, transport_peers *dst,
         i++, (argp+=stride?stride:sizeof(*args))) {
        const struct comm_addr *ca=(void*)argp;
        slog(st, LOG_PEER_ADDRS, " args: addrs[%d]=%s",
-            i, ca->comm->addr_to_string(ca->comm->st,ca));
+            i, comm_addr_to_string(ca));
     }
     for (i=0; i<dst->npeers; i++) {
        struct timeval diff;
        timersub(tv_now,&dst->peers[i].last,&diff);
        const struct comm_addr *ca=&dst->peers[i].addr;
        slog(st, LOG_PEER_ADDRS, " peers: addrs[%d]=%s T-%ld.%06ld",
-            i, ca->comm->addr_to_string(ca->comm->st,ca),
+            i, comm_addr_to_string(ca),
             (unsigned long)diff.tv_sec, (unsigned long)diff.tv_usec);
     }
 }
@@ -1955,7 +2075,7 @@ static void transport_record_peer(struct site *st, transport_peers *peers,
 
     changed=1;
     if (peers->npeers==st->transport_peers_max)
-       slot=st->transport_peers_max;
+       slot=st->transport_peers_max-1;
     else
        slot=peers->npeers++;
 
@@ -1986,7 +2106,7 @@ static bool_t transport_compute_setupinit_peers(struct site *st,
         prod_hint_addr ? " PROD hint address;" : "",
         st->peers.npeers);
 
-    /* Non-mobile peers havve st->peers.npeers==0 or ==1, since they
+    /* Non-mobile peers have st->peers.npeers==0 or ==1, since they
      * have transport_peers_max==1.  The effect is that this code
      * always uses the configured address if supplied, or otherwise
      * the address of the incoming PROD, or the existing data peer if
@@ -2028,6 +2148,17 @@ static void transport_peers_copy(struct site *st, transport_peers *dst,
                          src->npeers, &src->peers->addr, sizeof(*src->peers));
 }
 
+static void transport_resolve_complete(struct site *st,
+                                      const struct comm_addr *ca_use) {
+    transport_record_peer(st,&st->peers,ca_use,"resolved data");
+    transport_record_peer(st,&st->setup_peers,ca_use,"resolved setup");
+}
+
+static void transport_resolve_complete_tardy(struct site *st,
+                                            const struct comm_addr *ca_use) {
+    transport_record_peer(st,&st->peers,ca_use,"resolved tardily");
+}
+
 void transport_xmit(struct site *st, transport_peers *peers,
                    struct buffer_if *buf, bool_t candebug) {
     int slot;