chiark / gitweb /
changelog: start 0.4.2~
[secnet.git] / site.c
diff --git a/site.c b/site.c
index 15dace88d3c74f0e22a35b891d91a061c311877e..dcac0baaf3428f20c8a97b83eb4d169954271119 100644 (file)
--- a/site.c
+++ b/site.c
@@ -1,5 +1,24 @@
 /* site.c - manage communication with a remote network site */
 
+/*
+ * This file is part of secnet.
+ * See README for full list of copyright holders.
+ *
+ * secnet is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version d of the License, or
+ * (at your option) any later version.
+ * 
+ * secnet is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * version 3 along with secnet; if not, see
+ * https://www.gnu.org/licenses/gpl.html.
+ */
+
 /* The 'site' code doesn't know anything about the structure of the
    packets it's transmitting.  In fact, under the new netlink
    configuration scheme it doesn't need to know anything at all about
@@ -277,14 +296,16 @@ struct site {
 /* configuration information */
     string_t localname;
     string_t remotename;
+    bool_t keepalive;
     bool_t local_mobile, peer_mobile; /* Mobile client support */
     int32_t transport_peers_max;
     string_t tunname; /* localname<->remotename by default, used in logs */
-    string_t address; /* DNS name for bootstrapping, optional */
+    cstring_t *addresses; /* DNS name or address(es) for bootstrapping, optional */
     int remoteport; /* Port for bootstrapping, optional */
     uint32_t mtu_target;
     struct netlink_if *netlink;
     struct comm_if **comms;
+    struct comm_clientinfo **commclientinfos;
     int ncomms;
     struct resolver_if *resolver;
     struct log_if *log;
@@ -315,7 +336,10 @@ struct site {
     uint32_t state;
     uint64_t now; /* Most recently seen time */
     bool_t allow_send_prod;
-    bool_t resolving;
+    int resolving_count;
+    int resolving_n_results_all;
+    int resolving_n_results_stored;
+    struct comm_addr resolving_results[MAX_PEER_ADDRS];
 
     /* The currently established session */
     struct data_key current;
@@ -430,6 +454,7 @@ static bool_t initiate_key_setup(struct site *st, cstring_t reason,
                                 const struct comm_addr *prod_hint);
 static void enter_state_run(struct site *st);
 static bool_t enter_state_resolve(struct site *st);
+static void decrement_resolving_count(struct site *st, int by);
 static bool_t enter_new_state(struct site *st,uint32_t next);
 static void enter_state_wait(struct site *st);
 static void activate_new_key(struct site *st);
@@ -516,9 +541,10 @@ static void set_new_transform(struct site *st, char *pk)
     assert(st->sharedsecretlen);
     if (st->sharedsecretlen > st->sharedsecretallocd) {
        st->sharedsecretallocd=st->sharedsecretlen;
-       st->sharedsecret=realloc(st->sharedsecret,st->sharedsecretallocd);
+       st->sharedsecret=safe_realloc_ary(st->sharedsecret,1,
+                                         st->sharedsecretallocd,
+                                         "site:sharedsecret");
     }
-    if (!st->sharedsecret) fatal_perror("site:sharedsecret");
 
     /* Generate the shared key */
     st->dh->makeshared(st->dh->st,st->dhsecret,st->dh->len,pk,
@@ -1122,6 +1148,10 @@ static bool_t process_msg0(struct site *st, struct buffer_if *msg0,
     case LABEL_MSG7:
        /* We must forget about the current session. */
        delete_keys(st,"request from peer",LOG_SEC);
+       /* probably, the peer is shutting down, and this is going to fail,
+        * but we need to be trying to bring the link up again */
+       if (st->keepalive)
+           initiate_key_setup(st,"peer requested key teardown",0);
        return True;
     case LABEL_MSG9:
        /* Deliver to netlink layer */
@@ -1140,16 +1170,34 @@ static bool_t process_msg0(struct site *st, struct buffer_if *msg0,
 }
 
 static void dump_packet(struct site *st, struct buffer_if *buf,
-                       const struct comm_addr *addr, bool_t incoming)
+                       const struct comm_addr *addr, bool_t incoming,
+                       bool_t ok)
 {
     uint32_t dest=get_uint32(buf->start);
     uint32_t source=get_uint32(buf->start+4);
     uint32_t msgtype=get_uint32(buf->start+8);
 
     if (st->log_events & LOG_DUMP)
-       slilog(st->log,M_DEBUG,"%s: %s: %08x<-%08x: %08x:",
+       slilog(st->log,M_DEBUG,"%s: %s: %08x<-%08x: %08x: %s%s",
               st->tunname,incoming?"incoming":"outgoing",
-              dest,source,msgtype);
+              dest,source,msgtype,comm_addr_to_string(addr),
+              ok?"":" - fail");
+}
+
+static bool_t comm_addr_sendmsg(struct site *st,
+                               const struct comm_addr *dest,
+                               struct buffer_if *buf)
+{
+    int i;
+    struct comm_clientinfo *commclientinfo = 0;
+
+    for (i=0; i < st->ncomms; i++) {
+       if (st->comms[i] == dest->comm) {
+           commclientinfo = st->commclientinfos[i];
+           break;
+       }
+    }
+    return dest->comm->sendmsg(dest->comm->st, buf, dest, commclientinfo);
 }
 
 static uint32_t site_status(void *st)
@@ -1191,23 +1239,51 @@ static bool_t send_msg(struct site *st)
 }
 
 static void site_resolve_callback(void *sst, const struct comm_addr *addrs,
-                                 int naddrs, int was_naddrs,
-                                 const char *failwhy)
+                                 int stored_naddrs, int all_naddrs,
+                                 const char *address, const char *failwhy)
 {
     struct site *st=sst;
 
-    st->resolving=False;
+    if (!stored_naddrs) {
+       slog(st,LOG_ERROR,"resolution of %s failed: %s",address,failwhy);
+    } else {
+       slog(st,LOG_PEER_ADDRS,"resolution of %s completed, %d addrs, eg: %s",
+            address, all_naddrs, comm_addr_to_string(&addrs[0]));;
+
+       int space=st->transport_peers_max-st->resolving_n_results_stored;
+       int n_tocopy=MIN(stored_naddrs,space);
+       COPY_ARRAY(st->resolving_results + st->resolving_n_results_stored,
+                  addrs,
+                  n_tocopy);
+       st->resolving_n_results_stored += n_tocopy;
+       st->resolving_n_results_all += all_naddrs;
+    }
+
+    decrement_resolving_count(st,1);
+}
+
+static void decrement_resolving_count(struct site *st, int by)
+{
+    assert(st->resolving_count>0);
+    st->resolving_count-=by;
+
+    if (st->resolving_count)
+       return;
+
+    /* OK, we are done with them all.  Handle combined results. */
+
+    const struct comm_addr *addrs=st->resolving_results;
+    int naddrs=st->resolving_n_results_stored;
+    assert(naddrs<=st->transport_peers_max);
 
     if (naddrs) {
-       slog(st,LOG_STATE,"resolution of %s completed, %d addrs, eg: %s",
-            st->address, was_naddrs, comm_addr_to_string(&addrs[0]));;
-       if (naddrs != was_naddrs) {
+       if (naddrs != st->resolving_n_results_all) {
            slog(st,LOG_SETUP_INIT,"resolution of supplied addresses/names"
                 " yielded too many results (%d > %d), some ignored",
-                was_naddrs, naddrs);
+                st->resolving_n_results_all, naddrs);
        }
-    } else {
-       slog(st,LOG_ERROR,"resolution of %s failed: %s",st->address,failwhy);
+       slog(st,LOG_STATE,"resolution completed, %d addrs, eg: %s",
+            naddrs, iaddr_to_string(&addrs[0].ia));;
     }
 
     switch (st->state) {
@@ -1233,29 +1309,27 @@ static void site_resolve_callback(void *sst, const struct comm_addr *addrs,
        } else if (st->local_mobile) {
            /* We can't let this rest because we may have a peer
             * address which will break in the future. */
-           slog(st,LOG_SETUP_INIT,"resolution of %s failed: "
-                "abandoning key exchange",st->address);
+           slog(st,LOG_SETUP_INIT,"resolution failed: "
+                "abandoning key exchange");
            enter_state_wait(st);
        } else {
-           slog(st,LOG_SETUP_INIT,"resolution of %s failed: "
+           slog(st,LOG_SETUP_INIT,"resolution failed: "
                 " continuing to use source address of peer's packets"
-                " for key exchange and ultimately data",
-                st->address);
+                " for key exchange and ultimately data");
        }
        break;
     case SITE_RUN:
        if (naddrs) {
-           slog(st,LOG_SETUP_INIT,"resolution of %s completed tardily,"
-                " updating peer address(es)",st->address);
+           slog(st,LOG_SETUP_INIT,"resolution completed tardily,"
+                " updating peer address(es)");
            transport_resolve_complete_tardy(st,addrs,naddrs);
        } else if (st->local_mobile) {
            /* Not very good.  We should queue (another) renegotiation
             * so that we can update the peer address. */
            st->key_renegotiate_time=st->now+st->wait_timeout;
        } else {
-           slog(st,LOG_SETUP_INIT,"resolution of %s failed: "
-                " continuing to use source address of peer's packets",
-                st->address);
+           slog(st,LOG_SETUP_INIT,"resolution failed: "
+                " continuing to use source address of peer's packets");
        }
        break;
     case SITE_WAIT:
@@ -1271,8 +1345,8 @@ static bool_t initiate_key_setup(struct site *st, cstring_t reason,
     /* Reentrancy hazard: can call enter_new_state/enter_state_* */
     if (st->state!=SITE_RUN) return False;
     slog(st,LOG_SETUP_INIT,"initiating key exchange (%s)",reason);
-    if (st->address) {
-       slog(st,LOG_SETUP_INIT,"resolving peer address");
+    if (st->addresses) {
+       slog(st,LOG_SETUP_INIT,"resolving peer address(es)");
        return enter_state_resolve(st);
     } else if (transport_compute_setupinit_peers(st,0,0,prod_hint)) {
        return enter_new_state(st,SITE_SENTMSG1);
@@ -1353,7 +1427,7 @@ static void set_link_quality(struct site *st)
        quality=LINK_QUALITY_UP;
     else if (st->state==SITE_WAIT || st->state==SITE_STOP)
        quality=LINK_QUALITY_DOWN;
-    else if (st->address)
+    else if (st->addresses)
        quality=LINK_QUALITY_DOWN_CURRENT_ADDRESS;
     else if (transport_peers_valid(&st->peers))
        quality=LINK_QUALITY_DOWN_STALE_ADDRESS;
@@ -1377,29 +1451,46 @@ static void enter_state_run(struct site *st)
     memset(st->dhsecret,0,st->dh->len);
     memset(st->sharedsecret,0,st->sharedsecretlen);
     set_link_quality(st);
+
+    if (st->keepalive && !current_valid(st))
+       initiate_key_setup(st, "keepalive", 0);
 }
 
 static bool_t ensure_resolving(struct site *st)
 {
     /* Reentrancy hazard: may call site_resolve_callback and hence
      * enter_new_state, enter_state_* and generate_msg*. */
-    if (st->resolving)
+    if (st->resolving_count)
         return True;
 
-    assert(st->address);
+    assert(st->addresses);
 
     /* resolver->request might reentrantly call site_resolve_callback
-     * which will clear st->resolving, so we need to set it beforehand
-     * rather than afterwards; also, it might return False, in which
-     * case we have to clear ->resolving again. */
-    st->resolving=True;
-    bool_t ok = st->resolver->request(st->resolver->st,st->address,
-                                     st->remoteport,st->comms[0],
-                                     site_resolve_callback,st);
-    if (!ok)
-       st->resolving=False;
-
-    return ok;
+     * which will decrement st->resolving, so we need to increment it
+     * twice beforehand to prevent decrement from thinking we're
+     * finished, and decrement it ourselves.  Alternatively if
+     * everything fails then there are no callbacks due and we simply
+     * set it to 0 and return false.. */
+    st->resolving_n_results_stored=0;
+    st->resolving_n_results_all=0;
+    st->resolving_count+=2;
+    const char **addrp=st->addresses;
+    const char *address;
+    bool_t anyok=False;
+    for (; (address=*addrp++); ) {
+       bool_t ok = st->resolver->request(st->resolver->st,address,
+                                         st->remoteport,st->comms[0],
+                                         site_resolve_callback,st);
+       if (ok)
+           st->resolving_count++;
+       anyok|=ok;
+    }
+    if (!anyok) {
+       st->resolving_count=0;
+       return False;
+    }
+    decrement_resolving_count(st,2);
+    return True;
 }
 
 static bool_t enter_state_resolve(struct site *st)
@@ -1536,8 +1627,8 @@ static void generate_send_prod(struct site *st,
     slog(st,LOG_SETUP_INIT,"prodding peer for key exchange");
     st->allow_send_prod=0;
     generate_prod(st,&st->scratch);
-    dump_packet(st,&st->scratch,source,False);
-    source->comm->sendmsg(source->comm->st, &st->scratch, source);
+    bool_t ok = comm_addr_sendmsg(st, source, &st->scratch);
+    dump_packet(st,&st->scratch,source,False,ok);
 }
 
 static inline void site_settimeout(uint64_t timeout, int *timeout_io)
@@ -1556,7 +1647,7 @@ static int site_beforepoll(void *sst, struct pollfd *fds, int *nfds_io,
 {
     struct site *st=sst;
 
-    *nfds_io=0; /* We don't use any file descriptors */
+    BEFOREPOLL_WANT_FDS(0); /* We don't use any file descriptors */
     st->now=*now;
 
     /* Work out when our next timeout is. The earlier of 'timeout' or
@@ -1670,7 +1761,7 @@ static bool_t site_incoming(void *sst, struct buffer_if *buf,
        if (!named_for_us(st,buf,msgtype,&named_msg))
            return False;
        /* It's a MSG1 addressed to us. Decide what to do about it. */
-       dump_packet(st,buf,source,True);
+       dump_packet(st,buf,source,True,True);
        if (st->state==SITE_RUN || st->state==SITE_RESOLVE ||
            st->state==SITE_WAIT) {
            /* We should definitely process it */
@@ -1678,7 +1769,7 @@ static bool_t site_incoming(void *sst, struct buffer_if *buf,
            if (process_msg1(st,buf,source,&named_msg)) {
                slog(st,LOG_SETUP_INIT,"key setup initiated by peer");
                bool_t entered=enter_new_state(st,SITE_SENTMSG2);
-               if (entered && st->address && st->local_mobile)
+               if (entered && st->addresses && st->local_mobile)
                    /* We must do this as the very last thing, because
                       the resolver callback might reenter us. */
                    ensure_resolving(st);
@@ -1720,7 +1811,7 @@ static bool_t site_incoming(void *sst, struct buffer_if *buf,
     if (msgtype==LABEL_PROD) {
        if (!named_for_us(st,buf,msgtype,&named_msg))
            return False;
-       dump_packet(st,buf,source,True);
+       dump_packet(st,buf,source,True,True);
        if (st->state!=SITE_RUN) {
            slog(st,LOG_DROP,"ignoring PROD when not in state RUN");
        } else if (current_valid(st)) {
@@ -1733,7 +1824,7 @@ static bool_t site_incoming(void *sst, struct buffer_if *buf,
     }
     if (dest==st->index) {
        /* Explicitly addressed to us */
-       if (msgtype!=LABEL_MSG0) dump_packet(st,buf,source,True);
+       if (msgtype!=LABEL_MSG0) dump_packet(st,buf,source,True,True);
        switch (msgtype) {
        case LABEL_NAK:
            /* If the source is our current peer then initiate a key setup,
@@ -1856,6 +1947,23 @@ static void site_phase_hook(void *sst, uint32_t newphase)
     send_msg7(st,"shutting down");
 }
 
+static void site_childpersist_clearkeys(void *sst, uint32_t newphase)
+{
+    struct site *st=sst;
+    dispose_transform(&st->current.transform);
+    dispose_transform(&st->auxiliary_key.transform);
+    dispose_transform(&st->new_transform);
+    /* Not much point overwiting the signing key, since we loaded it
+       from disk, and it is only valid prospectively if at all,
+       anyway. */
+    /* XXX it would be best to overwrite the DH state, because that
+       _is_ relevant to forward secrecy.  However we have no
+       convenient interface for doing that and in practice gmp has
+       probably dribbled droppings all over the malloc arena.  A good
+       way to fix this would be to have a privsep child for asymmetric
+       crypto operations, but that's a task for another day. */
+}
+
 static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
                          list_t *args)
 {
@@ -1865,7 +1973,7 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     dict_t *dict;
     int i;
 
-    st=safe_malloc(sizeof(*st),"site_apply");
+    NEW(st);
 
     st->cl.description="site";
     st->cl.type=CL_SITE;
@@ -1884,6 +1992,8 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     st->localname=dict_read_string(dict, "local-name", True, "site", loc);
     st->remotename=dict_read_string(dict, "name", True, "site", loc);
 
+    st->keepalive=dict_read_bool(dict,"keepalive",False,"site",loc,False);
+
     st->peer_mobile=dict_read_bool(dict,"mobile",False,"site",loc,False);
     st->local_mobile=
        dict_read_bool(dict,"local-mobile",False,"site",loc,False);
@@ -1918,7 +2028,7 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     if (!things##_cfg)                                                 \
        cfgfatal(loc,"site","closure list \"%s\" not found\n",dictkey); \
     st->nthings=list_length(things##_cfg);                             \
-    st->things=safe_malloc_ary(sizeof(*st->things),st->nthings,dictkey "s"); \
+    NEW_ARY(st->things,st->nthings);                                   \
     assert(st->nthings);                                               \
     for (i=0; i<st->nthings; i++) {                                    \
        item_t *item=list_elem(things##_cfg,i);                         \
@@ -1933,13 +2043,21 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
 
     GET_CLOSURE_LIST("comm",comms,ncomms,CL_COMM);
 
+    NEW_ARY(st->commclientinfos, st->ncomms);
+    dict_t *comminfo = dict_read_dict(dict,"comm-info",False,"site",loc);
+    for (i=0; i<st->ncomms; i++) {
+       st->commclientinfos[i] =
+           !comminfo ? 0 :
+           st->comms[i]->clientinfo(st->comms[i],comminfo,loc);
+    }
+
     st->resolver=find_cl_if(dict,"resolver",CL_RESOLVER,True,"site",loc);
     st->log=find_cl_if(dict,"log",CL_LOG,True,"site",loc);
     st->random=find_cl_if(dict,"random",CL_RANDOMSRC,True,"site",loc);
 
     st->privkey=find_cl_if(dict,"local-key",CL_RSAPRIVKEY,True,"site",loc);
-    st->address=dict_read_string(dict, "address", False, "site", loc);
-    if (st->address)
+    st->addresses=dict_read_string_array(dict,"address",False,"site",loc,0);
+    if (st->addresses)
        st->remoteport=dict_read_number(dict,"port",True,"site",loc,0);
     else st->remoteport=0;
     st->pubkey=find_cl_if(dict,"key",CL_RSAPUBKEY,True,"site",loc);
@@ -1965,7 +2083,7 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     const char *peerskey= st->peer_mobile
        ? "mobile-peers-max" : "static-peers-max";
     st->transport_peers_max= dict_read_number(
-       dict,peerskey,False,"site",loc, st->address ? 4 : 3);
+       dict,peerskey,False,"site",loc, st->addresses ? 4 : 3);
     if (st->transport_peers_max<1 ||
        st->transport_peers_max>MAX_PEER_ADDRS) {
        cfgfatal(loc,"site", "%s must be in range 1.."
@@ -1986,7 +2104,7 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     st->log_events=string_list_to_word(dict_lookup(dict,"log-events"),
                                       log_event_table,"site");
 
-    st->resolving=False;
+    st->resolving_count=0;
     st->allow_send_prod=0;
 
     st->tunname=safe_malloc(strlen(st->localname)+strlen(st->remotename)+5,
@@ -2005,7 +2123,7 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
 
     /* We are interested in poll(), but only for timeouts. We don't have
        any fds of our own. */
-    register_for_poll(st, site_beforepoll, site_afterpoll, 0, "site");
+    register_for_poll(st, site_beforepoll, site_afterpoll, "site");
     st->timeout=0;
 
     st->remote_capabilities=0;
@@ -2045,6 +2163,7 @@ static list_t *site_apply(closure_t *self, struct cloc loc, dict_t *context,
     enter_state_stop(st);
 
     add_hook(PHASE_SHUTDOWN,site_phase_hook,st);
+    add_hook(PHASE_CHILDPERSIST,site_childpersist_clearkeys,st);
 
     return new_closure(&st->cl);
 }
@@ -2265,10 +2384,9 @@ void transport_xmit(struct site *st, transport_peers *peers,
     int nfailed=0;
     for (slot=0; slot<peers->npeers; slot++) {
        transport_peer *peer=&peers->peers[slot];
+       bool_t ok = comm_addr_sendmsg(st, &peer->addr, buf);
        if (candebug)
-           dump_packet(st, buf, &peer->addr, False);
-       bool_t ok =
-           peer->addr.comm->sendmsg(peer->addr.comm->st, buf, &peer->addr);
+           dump_packet(st, buf, &peer->addr, False, ok);
        if (!ok) {
            failed |= 1U << slot;
            nfailed++;
@@ -2290,12 +2408,14 @@ void transport_xmit(struct site *st, transport_peers *peers,
            transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
            assert(wslot+nfailed == peers->npeers);
            COPY_ARRAY(peers->peers+wslot, failedpeers, nfailed);
+           transport_peers_debug(st,peers,"mobile failure reorder",0,0,0);
        }
     } else {
        if (failed && peers->npeers > 1) {
            int wslot=0;
            transport_peers__copy_by_mask(peers->peers,&wslot,~failed,peers);
            peers->npeers=wslot;
+           transport_peers_debug(st,peers,"non-mobile failure cleanup",0,0,0);
        }
     }
 }