chiark / gitweb /
MacOS X support
[secnet.git] / netlink.c
index 4426a7c3575d31f79d1a8293a135d5cfe9d9cd02..cc954fd748bd7d8677cd2487987aac8f84f434c4 100644 (file)
--- a/netlink.c
+++ b/netlink.c
 /* User-kernel network link */
 
-/* Each netlink device is actually a router, with its own IP address.
-   We do things like decreasing the TTL and recalculating the header
-   checksum, generating ICMP, responding to pings, etc. */
+/* See RFCs 791, 792, 1123 and 1812 */
 
-/* This is where we have the anti-spoofing paranoia - before sending a
-   packet to the kernel we check that the tunnel it came over could
-   reasonably have produced it. */
+/* The netlink device is actually a router.  Tunnels are unnumbered
+   point-to-point lines (RFC1812 section 2.2.7); the router has a
+   single address (the 'router-id'). */
 
+/* This is where we currently have the anti-spoofing paranoia - before
+   sending a packet to the kernel we check that the tunnel it came
+   over could reasonably have produced it. */
+
+
+/* Points to note from RFC1812 (which may require changes in this
+   file):
+
+3.3.4 Maximum Transmission Unit - MTU
+
+   The MTU of each logical interface MUST be configurable within the
+   range of legal MTUs for the interface.
+
+   Many Link Layer protocols define a maximum frame size that may be
+   sent.  In such cases, a router MUST NOT allow an MTU to be set which
+   would allow sending of frames larger than those allowed by the Link
+   Layer protocol.  However, a router SHOULD be willing to receive a
+   packet as large as the maximum frame size even if that is larger than
+   the MTU.
+
+4.2.1  A router SHOULD count datagrams discarded.
+
+4.2.2.1 Source route options - we probably should implement processing
+of source routes, even though mostly the security policy will prevent
+their use.
+
+5.3.13.4 Source Route Options
+
+   A router MUST implement support for source route options in forwarded
+   packets.  A router MAY implement a configuration option that, when
+   enabled, causes all source-routed packets to be discarded.  However,
+   such an option MUST NOT be enabled by default.
+
+5.3.13.5 Record Route Option
+
+   Routers MUST support the Record Route option in forwarded packets.
+
+   A router MAY provide a configuration option that, if enabled, will
+   cause the router to ignore (i.e., pass through unchanged) Record
+   Route options in forwarded packets.  If provided, such an option MUST
+   default to enabling the record-route.  This option should not affect
+   the processing of Record Route options in datagrams received by the
+   router itself (in particular, Record Route options in ICMP echo
+   requests will still be processed according to Section [4.3.3.6]).
+
+5.3.13.6 Timestamp Option
+
+   Routers MUST support the timestamp option in forwarded packets.  A
+   timestamp value MUST follow the rules given [INTRO:2].
+
+   If the flags field = 3 (timestamp and prespecified address), the
+   router MUST add its timestamp if the next prespecified address
+   matches any of the router's IP addresses.  It is not necessary that
+   the prespecified address be either the address of the interface on
+   which the packet arrived or the address of the interface over which
+   it will be sent.
+
+
+4.2.2.7 Fragmentation: RFC 791 Section 3.2
+
+   Fragmentation, as described in [INTERNET:1], MUST be supported by a
+   router.
+
+4.2.2.8 Reassembly: RFC 791 Section 3.2
+
+   As specified in the corresponding section of [INTRO:2], a router MUST
+   support reassembly of datagrams that it delivers to itself.
+
+4.2.2.9 Time to Live: RFC 791 Section 3.2
+
+   Note in particular that a router MUST NOT check the TTL of a packet
+   except when forwarding it.
+
+   A router MUST NOT discard a datagram just because it was received
+   with TTL equal to zero or one; if it is to the router and otherwise
+   valid, the router MUST attempt to receive it.
+
+   On messages the router originates, the IP layer MUST provide a means
+   for the transport layer to set the TTL field of every datagram that
+   is sent.  When a fixed TTL value is used, it MUST be configurable.
+
+
+8.1 The Simple Network Management Protocol - SNMP
+8.1.1 SNMP Protocol Elements
+
+   Routers MUST be manageable by SNMP [MGT:3].  The SNMP MUST operate
+   using UDP/IP as its transport and network protocols.
+
+
+*/
+
+#include <string.h>
 #include "secnet.h"
 #include "util.h"
 #include "ipaddr.h"
 #define OPT_SOFTROUTE   1
 #define OPT_ALLOWROUTE  2
 
+#define ICMP_TYPE_ECHO_REPLY             0
+
+#define ICMP_TYPE_UNREACHABLE            3
+#define ICMP_CODE_NET_UNREACHABLE        0
+#define ICMP_CODE_PROTOCOL_UNREACHABLE   2
+#define ICMP_CODE_FRAGMENTATION_REQUIRED 4
+#define ICMP_CODE_NET_PROHIBITED        13
+
+#define ICMP_TYPE_ECHO_REQUEST           8
+
+#define ICMP_TYPE_TIME_EXCEEDED         11
+#define ICMP_CODE_TTL_EXCEEDED           0
+
 /* Generic IP checksum routine */
 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
 {
@@ -45,30 +148,30 @@ static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
     uint32_t sum;
 
-    __asm__ __volatile__("
-            movl (%1), %0
-            subl $4, %2
-            jbe 2f
-            addl 4(%1), %0
-            adcl 8(%1), %0
-            adcl 12(%1), %0
-1:          adcl 16(%1), %0
-            lea 4(%1), %1
-            decl %2
-            jne 1b
-            adcl $0, %0
-            movl %0, %2
-            shrl $16, %0
-            addw %w2, %w0
-            adcl $0, %0
-            notl %0
-2:
-            "
+    __asm__ __volatile__(
+            "movl (%1), %0      ;\n"
+            "subl $4, %2        ;\n"
+            "jbe 2f             ;\n"
+            "addl 4(%1), %0     ;\n"
+            "adcl 8(%1), %0     ;\n"
+            "adcl 12(%1), %0    ;\n"
+"1:         adcl 16(%1), %0     ;\n"
+            "lea 4(%1), %1      ;\n"
+            "decl %2            ;\n"
+            "jne 1b             ;\n"
+            "adcl $0, %0        ;\n"
+            "movl %0, %2        ;\n"
+            "shrl $16, %0       ;\n"
+            "addw %w2, %w0      ;\n"
+            "adcl $0, %0        ;\n"
+            "notl %0            ;\n"
+"2:                             ;\n"
         /* Since the input registers which are loaded with iph and ipl
            are modified, we must also specify them as outputs, or gcc
            will assume they contain their original values. */
         : "=r" (sum), "=r" (iph), "=r" (ihl)
-        : "1" (iph), "2" (ihl));
+        : "1" (iph), "2" (ihl)
+       : "memory");
     return sum;
 }
 #else
@@ -122,6 +225,13 @@ static void netlink_packet_deliver(struct netlink *st,
                                   struct netlink_client *client,
                                   struct buffer_if *buf);
 
+/* XXX RFC1812 4.3.2.5:
+   All other ICMP error messages (Destination Unreachable,
+   Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
+   precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
+   CONTROL).  The IP Precedence value for these error messages MAY be
+   settable.
+   */
 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
                                         uint32_t dest,uint16_t len)
 {
@@ -137,7 +247,7 @@ static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
     h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
     h->iph.id=0;
     h->iph.frag_off=0;
-    h->iph.ttl=255;
+    h->iph.ttl=255; /* XXX should be configurable */
     h->iph.protocol=1;
     h->iph.saddr=htonl(st->secnet_address);
     h->iph.daddr=htonl(dest);
@@ -206,6 +316,26 @@ static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
 
 /* How much of the original IP packet do we include in its ICMP
    response? The header plus up to 64 bits. */
+
+/* XXX TODO RFC1812:
+4.3.2.3 Original Message Header
+
+   Historically, every ICMP error message has included the Internet
+   header and at least the first 8 data bytes of the datagram that
+   triggered the error.  This is no longer adequate, due to the use of
+   IP-in-IP tunneling and other technologies.  Therefore, the ICMP
+   datagram SHOULD contain as much of the original datagram as possible
+   without the length of the ICMP datagram exceeding 576 bytes.  The
+   returned IP header (and user data) MUST be identical to that which
+   was received, except that the router is not required to undo any
+   modifications to the IP header that are normally performed in
+   forwarding that were performed before the error was detected (e.g.,
+   decrementing the TTL, or updating options).  Note that the
+   requirements of Section [4.3.3.5] supersede this requirement in some
+   cases (i.e., for a Parameter Problem message, if the problem is in a
+   modified field, the router must undo the modification).  See Section
+   [4.3.3.5]).
+   */
 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
 {
     struct iphdr *iph=(struct iphdr *)buf->start;
@@ -242,6 +372,7 @@ static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
 /*
  * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
  * checksum.
+ * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
  *
  * Is the datagram acceptable?
  *
@@ -266,7 +397,8 @@ static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
 }
 
 /* Deliver a packet. "client" is the _origin_ of the packet, not its
-destination.  */
+   destination, and is NULL for packets from the host and packets
+   generated internally in secnet.  */
 static void netlink_packet_deliver(struct netlink *st,
                                   struct netlink_client *client,
                                   struct buffer_if *buf)
@@ -283,12 +415,12 @@ static void netlink_packet_deliver(struct netlink *st,
     BUF_ASSERT_USED(buf);
 
     if (dest==st->secnet_address) {
-       Message(M_ERR,"%s: trying to deliver a packet to myself!\n");
+       Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
        BUF_FREE(buf);
        return;
     }
     
-    /* Packets from the host (client==NULL) will always be routed.  Packets
+    /* Packets from the host (client==NULL) may always be routed.  Packets
        from clients with the allow_route option will also be routed. */
     if (!client || (client && (client->options & OPT_ALLOWROUTE)))
        allow_route=True;
@@ -301,8 +433,9 @@ static void netlink_packet_deliver(struct netlink *st,
 
     best_quality=0;
     best_match=-1;
-    for (i=0; i<st->n_routes; i++) {
-       if (st->routes[i].up && subnet_match(&st->routes[i].net,dest)) {
+    for (i=0; i<st->n_clients; i++) {
+       if (st->routes[i]->up &&
+           ipset_contains_addr(st->routes[i]->networks,dest)) {
            /* It's an available route to the correct destination. But is
               it better than the one we already have? */
 
@@ -310,14 +443,14 @@ static void netlink_packet_deliver(struct netlink *st,
               bother looking at routes we're not allowed to use.  If
               we don't yet have an allowed route we'll consider any.  */
            if (!allow_route && found_allowed) {
-               if (!(st->routes[i].c->options&OPT_ALLOWROUTE)) continue;
+               if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
            }
            
-           if (st->routes[i].c->link_quality>best_quality
+           if (st->routes[i]->link_quality>best_quality
                || best_quality==0) {
-               best_quality=st->routes[i].c->link_quality;
+               best_quality=st->routes[i]->link_quality;
                best_match=i;
-               if (st->routes[i].c->options&OPT_ALLOWROUTE)
+               if (st->routes[i]->options&OPT_ALLOWROUTE)
                    found_allowed=True;
                /* If quality isn't perfect we may wish to
                   consider kicking the tunnel with a 0-length
@@ -334,7 +467,7 @@ static void netlink_packet_deliver(struct netlink *st,
     if (best_match==-1) {
        /* The packet's not going down a tunnel.  It might (ought to)
           be for the host.   */
-       if (subnet_matches_list(&st->networks,dest)) {
+       if (ipset_contains_addr(st->networks,dest)) {
            st->deliver_to_host(st->dst,buf);
            st->outcount++;
            BUF_ASSERT_FREE(buf);
@@ -342,15 +475,16 @@ static void netlink_packet_deliver(struct netlink *st,
            string_t s,d;
            s=ipaddr_to_string(source);
            d=ipaddr_to_string(dest);
-           Message(M_ERR,"%s: don't know where to deliver packet "
+           Message(M_DEBUG,"%s: don't know where to deliver packet "
                    "(s=%s, d=%s)\n", st->name, s, d);
            free(s); free(d);
-           netlink_icmp_simple(st,buf,client,3,0);
+           netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
+                               ICMP_CODE_NET_UNREACHABLE);
            BUF_FREE(buf);
        }
     } else {
        if (!allow_route &&
-           !(st->routes[best_match].c->options&OPT_ALLOWROUTE)) {
+           !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
            string_t s,d;
            s=ipaddr_to_string(source);
            d=ipaddr_to_string(dest);
@@ -361,18 +495,22 @@ static void netlink_packet_deliver(struct netlink *st,
                    st->name,s,d);
            free(s); free(d);
                    
-           netlink_icmp_simple(st,buf,client,3,9);
+           netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
+                               ICMP_CODE_NET_PROHIBITED);
            BUF_FREE(buf);
-       }
-       if (best_quality>0) {
-           st->routes[best_match].c->deliver(
-               st->routes[best_match].c->dst, buf);
-           st->routes[best_match].outcount++;
-           BUF_ASSERT_FREE(buf);
        } else {
-           /* Generate ICMP destination unreachable */
-           netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
-           BUF_FREE(buf);
+           if (best_quality>0) {
+               /* XXX Fragment if required */
+               st->routes[best_match]->deliver(
+                   st->routes[best_match]->dst, buf);
+               st->routes[best_match]->outcount++;
+               BUF_ASSERT_FREE(buf);
+           } else {
+               /* Generate ICMP destination unreachable */
+               netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
+                                   ICMP_CODE_NET_UNREACHABLE); /* client==NULL */
+               BUF_FREE(buf);
+           }
        }
     }
     BUF_ASSERT_FREE(buf);
@@ -389,7 +527,8 @@ static void netlink_packet_forward(struct netlink *st,
     /* Packet has already been checked */
     if (iph->ttl<=1) {
        /* Generate ICMP time exceeded */
-       netlink_icmp_simple(st,buf,client,11,0);
+       netlink_icmp_simple(st,buf,client,ICMP_TYPE_TIME_EXCEEDED,
+                           ICMP_CODE_TTL_EXCEEDED);
        BUF_FREE(buf);
        return;
     }
@@ -421,13 +560,13 @@ static void netlink_packet_local(struct netlink *st,
 
     if (h->iph.protocol==1) {
        /* It's ICMP */
-       if (h->type==8 && h->code==0) {
+       if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
            /* ICMP echo-request. Special case: we re-use the buffer
               to construct the reply. */
-           h->type=0;
+           h->type=ICMP_TYPE_ECHO_REPLY;
            h->iph.daddr=h->iph.saddr;
            h->iph.saddr=htonl(st->secnet_address);
-           h->iph.ttl=255; /* Be nice and bump it up again... */
+           h->iph.ttl=255;
            h->iph.check=0;
            h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
            netlink_icmp_csum(h);
@@ -437,7 +576,8 @@ static void netlink_packet_local(struct netlink *st,
        Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
     } else {
        /* Send ICMP protocol unreachable */
-       netlink_icmp_simple(st,buf,client,3,2);
+       netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
+                           ICMP_CODE_PROTOCOL_UNREACHABLE);
        BUF_FREE(buf);
        return;
     }
@@ -465,11 +605,13 @@ static void netlink_incoming(struct netlink *st, struct netlink_client *client,
     source=ntohl(iph->saddr);
     dest=ntohl(iph->daddr);
 
-    /* Check source */
+    /* Check source. If we don't like the source, there's no point
+       generating ICMP because we won't know how to get it to the
+       source of the packet. */
     if (client) {
        /* Check that the packet source is appropriate for the tunnel
           it came down */
-       if (!subnet_matches_list(&client->networks,source)) {
+       if (!ipset_contains_addr(client->networks,source)) {
            string_t s,d;
            s=ipaddr_to_string(source);
            d=ipaddr_to_string(dest);
@@ -483,7 +625,7 @@ static void netlink_incoming(struct netlink *st, struct netlink_client *client,
        /* Check that the packet originates in our configured local
           network, and hasn't been forwarded from elsewhere or
           generated with the wrong source address */
-       if (!subnet_matches_list(&st->networks,source)) {
+       if (!ipset_contains_addr(st->networks,source)) {
            string_t s,d;
            s=ipaddr_to_string(source);
            d=ipaddr_to_string(dest);
@@ -495,9 +637,11 @@ static void netlink_incoming(struct netlink *st, struct netlink_client *client,
        }
     }
 
-    /* If this is a point-to-point device we don't examine the packet at
-       all; we blindly send it down our one-and-only registered tunnel,
-       or to the host, depending on where it came from. */
+    /* If this is a point-to-point device we don't examine the
+       destination address at all; we blindly send it down our
+       one-and-only registered tunnel, or to the host, depending on
+       where it came from.  It's up to external software to check
+       address validity and generate ICMP, etc. */
     if (st->ptp) {
        if (client) {
            st->deliver_to_host(st->dst,buf);
@@ -508,27 +652,13 @@ static void netlink_incoming(struct netlink *st, struct netlink_client *client,
        return;
     }
 
-    /* (st->secnet_address needs checking before matching destination
-       addresses) */
+    /* st->secnet_address needs checking before matching destination
+       addresses */
     if (dest==st->secnet_address) {
        netlink_packet_local(st,client,buf);
        BUF_ASSERT_FREE(buf);
        return;
     }
-    if (client) {
-       /* Check for free routing */
-       if (!subnet_matches_list(&st->networks,dest)) {
-           string_t s,d;
-           s=ipaddr_to_string(source);
-           d=ipaddr_to_string(dest);
-           Message(M_WARNING,"%s: incoming packet from tunnel %s "
-                   "with bad destination address "
-                   "(s=%s,d=%s)\n",st->name,client->name,s,d);
-           free(s); free(d);
-           BUF_FREE(buf);
-           return;
-       }
-    }
     netlink_packet_forward(st,client,buf);
     BUF_ASSERT_FREE(buf);
 }
@@ -548,33 +678,28 @@ static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
     netlink_incoming(st,NULL,buf);
 }
 
-static void netlink_set_softlinks(struct netlink *st, struct netlink_client *c,
-                                 bool_t up, uint32_t quality)
+static void netlink_set_quality(void *sst, uint32_t quality)
 {
-    uint32_t i;
+    struct netlink_client *c=sst;
+    struct netlink *st=c->nst;
 
-    if (!st->routes) return; /* Table has not yet been created */
-    for (i=0; i<st->n_routes; i++) {
-       if (st->routes[i].c==c) {
-           st->routes[i].quality=quality;
-           if (!st->routes[i].hard) {
-               st->routes[i].up=up;
-               st->set_route(st->dst,&st->routes[i]);
-           }
-       }
+    c->link_quality=quality;
+    c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
+    if (c->options&OPT_SOFTROUTE) {
+       st->set_routes(st->dst,c);
     }
 }
 
-static void netlink_set_quality(void *sst, uint32_t quality)
+static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
+                                  struct subnet_list *snets)
 {
-    struct netlink_client *c=sst;
-    struct netlink *st=c->nst;
+    uint32_t i;
+    string_t net;
 
-    c->link_quality=quality;
-    if (c->link_quality==LINK_QUALITY_DOWN) {
-       netlink_set_softlinks(st,c,False,c->link_quality);
-    } else {
-       netlink_set_softlinks(st,c,True,c->link_quality);
+    for (i=0; i<snets->entries; i++) {
+       net=subnet_to_string(snets->list[i]);
+       Message(loglevel,"%s ",net);
+       free(net);
     }
 }
 
@@ -590,44 +715,45 @@ static void netlink_dump_routes(struct netlink *st, bool_t requested)
        Message(c,"%s: point-to-point (remote end is %s); routes:\n",
                st->name, net);
        free(net);
-       for (i=0; i<st->n_routes; i++) {
-           net=subnet_to_string(&st->routes[i].net);
-           Message(c,"%s ",net);
-           free(net);
-       }
+       netlink_output_subnets(st,c,st->clients->subnets);
        Message(c,"\n");
     } else {
        Message(c,"%s: routing table:\n",st->name);
-       for (i=0; i<st->n_routes; i++) {
-           net=subnet_to_string(&st->routes[i].net);
-           Message(c,"%s -> tunnel %s (%s,%s route,%s,quality %d,use %d)\n",net,
-                   st->routes[i].c->name,
-                   st->routes[i].hard?"hard":"soft",
-                   st->routes[i].allow_route?"free":"restricted",
-                   st->routes[i].up?"up":"down",
-                   st->routes[i].quality,
-                   st->routes[i].outcount);
-           free(net);
+       for (i=0; i<st->n_clients; i++) {
+           netlink_output_subnets(st,c,st->routes[i]->subnets);
+           Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
+                   "quality %d,use %d,pri %lu)\n",
+                   st->routes[i]->name,
+                   st->routes[i]->up?"up":"down",
+                   st->routes[i]->mtu,
+                   st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
+                   st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
+                   st->routes[i]->link_quality,
+                   st->routes[i]->outcount,
+                   (unsigned long)st->routes[i]->priority);
        }
        net=ipaddr_to_string(st->secnet_address);
        Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
                net,st->name,st->localcount);
        free(net);
-       for (i=0; i<st->networks.entries; i++) {
-           net=subnet_to_string(&st->networks.list[i]);
-           Message(c,"%s -> host (use %d)\n",net,st->outcount);
+       for (i=0; i<st->subnets->entries; i++) {
+           net=subnet_to_string(st->subnets->list[i]);
+           Message(c,"%s ",net);
            free(net);
        }
+       if (i>0)
+           Message(c,"-> host (use %d)\n",st->outcount);
     }
 }
 
-static int netlink_compare_route_specificity(const void *ap, const void *bp)
+/* ap is a pointer to a member of the routes array */
+static int netlink_compare_client_priority(const void *ap, const void *bp)
 {
-    const struct netlink_route *a=ap;
-    const struct netlink_route *b=bp;
+    const struct netlink_client *const*a=ap;
+    const struct netlink_client *const*b=bp;
 
-    if (a->net.len==b->net.len) return 0;
-    if (a->net.len<b->net.len) return 1;
+    if ((*a)->priority==(*b)->priority) return 0;
+    if ((*a)->priority<(*b)->priority) return 1;
     return -1;
 }
 
@@ -635,42 +761,20 @@ static void netlink_phase_hook(void *sst, uint32_t new_phase)
 {
     struct netlink *st=sst;
     struct netlink_client *c;
-    uint32_t i,j;
+    uint32_t i;
 
     /* All the networks serviced by the various tunnels should now
      * have been registered.  We build a routing table by sorting the
-     * routes into most-specific-first order.  */
-    st->routes=safe_malloc(st->n_routes*sizeof(*st->routes),
+     * clients by priority.  */
+    st->routes=safe_malloc(st->n_clients*sizeof(*st->routes),
                           "netlink_phase_hook");
     /* Fill the table */
     i=0;
-    for (c=st->clients; c; c=c->next) {
-       for (j=0; j<c->networks.entries; j++) {
-           st->routes[i].net=c->networks.list[j];
-           st->routes[i].c=c;
-           /* Hard routes are always up;
-              soft routes default to down; routes with no 'deliver' function
-              default to down */
-           st->routes[i].up=c->deliver?
-               (c->options&OPT_SOFTROUTE?False:True):
-               False;
-           st->routes[i].kup=False;
-           st->routes[i].hard=c->options&OPT_SOFTROUTE?False:True;
-           st->routes[i].allow_route=c->options&OPT_ALLOWROUTE?
-               True:False;
-           st->routes[i].quality=c->link_quality;
-           st->routes[i].outcount=0;
-           i++;
-       }
-    }
-    /* ASSERT i==st->n_routes */
-    if (i!=st->n_routes) {
-       fatal("netlink: route count error: expected %d got %d\n",
-             st->n_routes,i);
-    }
-    /* Sort the table in descending order of specificity */
-    qsort(st->routes,st->n_routes,sizeof(*st->routes),
-         netlink_compare_route_specificity);
+    for (c=st->clients; c; c=c->next)
+       st->routes[i++]=c;
+    /* Sort the table in descending order of priority */
+    qsort(st->routes,st->n_clients,sizeof(*st->routes),
+         netlink_compare_client_priority);
 
     netlink_dump_routes(st,False);
 }
@@ -682,6 +786,34 @@ static void netlink_signal_handler(void *sst, int signum)
     netlink_dump_routes(st,True);
 }
 
+static void netlink_inst_output_config(void *sst, struct buffer_if *buf)
+{
+/*    struct netlink_client *c=sst; */
+/*    struct netlink *st=c->nst; */
+
+    /* For now we don't output anything */
+    BUF_ASSERT_USED(buf);
+}
+
+static bool_t netlink_inst_check_config(void *sst, struct buffer_if *buf)
+{
+/*    struct netlink_client *c=sst; */
+/*    struct netlink *st=c->nst; */
+
+    BUF_ASSERT_USED(buf);
+    /* We need to eat all of the configuration information from the buffer
+       for backward compatibility. */
+    buf->size=0;
+    return True;
+}
+
+static void netlink_inst_set_mtu(void *sst, uint32_t new_mtu)
+{
+    struct netlink_client *c=sst;
+
+    c->mtu=new_mtu;
+}
+
 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver, 
                             void *dst, uint32_t max_start_pad,
                             uint32_t max_end_pad)
@@ -710,17 +842,23 @@ static closure_t *netlink_inst_create(struct netlink *st,
 {
     struct netlink_client *c;
     string_t name;
-    struct subnet_list networks;
-    uint32_t options;
+    struct ipset *networks;
+    uint32_t options,priority,mtu;
+    list_t *l;
 
     name=dict_read_string(dict, "name", True, st->name, loc);
 
-    dict_read_subnet_list(dict, "routes", True, st->name, loc,
-                         &networks);
+    l=dict_lookup(dict,"routes");
+    if (!l)
+       cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
+    networks=string_list_to_ipset(l,loc,st->name,"routes");
     options=string_list_to_word(dict_lookup(dict,"options"),
                                netlink_option_table,st->name);
 
-    if ((options&OPT_SOFTROUTE) && !st->set_route) {
+    priority=dict_read_number(dict,"priority",False,st->name,loc,0);
+    mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
+
+    if ((options&OPT_SOFTROUTE) && !st->set_routes) {
        cfgfatal(loc,st->name,"this netlink device does not support "
                 "soft routes.\n");
        return NULL;
@@ -738,11 +876,10 @@ static closure_t *netlink_inst_create(struct netlink *st,
        }
     }
 
-    /* Check that nets do not intersect st->exclude_remote_networks;
-       refuse to register if they do. */
-    if (subnet_lists_intersect(&st->exclude_remote_networks,&networks)) {
-       cfgfatal(loc,st->name,"networks intersect with the explicitly "
-                "excluded remote networks\n");
+    /* Check that nets are a subset of st->remote_networks;
+       refuse to register if they are not. */
+    if (!ipset_is_subset(st->remote_networks,networks)) {
+       cfgfatal(loc,st->name,"routes are not allowed\n");
        return NULL;
     }
 
@@ -755,17 +892,26 @@ static closure_t *netlink_inst_create(struct netlink *st,
     c->ops.reg=netlink_inst_reg;
     c->ops.deliver=netlink_inst_incoming;
     c->ops.set_quality=netlink_set_quality;
+    c->ops.output_config=netlink_inst_output_config;
+    c->ops.check_config=netlink_inst_check_config;
+    c->ops.set_mtu=netlink_inst_set_mtu;
     c->nst=st;
 
     c->networks=networks;
+    c->subnets=ipset_to_subnet_list(networks);
+    c->priority=priority;
     c->deliver=NULL;
     c->dst=NULL;
     c->name=name;
-    c->options=options;
     c->link_quality=LINK_QUALITY_DOWN;
+    c->mtu=mtu?mtu:st->mtu;
+    c->options=options;
+    c->outcount=0;
+    c->up=False;
+    c->kup=False;
     c->next=st->clients;
     st->clients=c;
-    st->n_routes+=networks.entries;
+    st->n_clients++;
 
     return &c->cl;
 }
@@ -779,8 +925,6 @@ static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
     item_t *item;
     closure_t *cl;
 
-    Message(M_DEBUG_CONFIG,"netlink_inst_apply\n");
-
     item=list_elem(args,0);
     if (!item || item->type!=t_dict) {
        cfgfatal(loc,st->name,"must have a dictionary argument\n");
@@ -794,11 +938,12 @@ static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
 
 netlink_deliver_fn *netlink_init(struct netlink *st,
                                 void *dst, struct cloc loc,
-                                dict_t *dict, string_t description,
-                                netlink_route_fn *set_route,
+                                dict_t *dict, cstring_t description,
+                                netlink_route_fn *set_routes,
                                 netlink_deliver_fn *to_host)
 {
     item_t *sa, *ptpa;
+    list_t *l;
 
     st->dst=dst;
     st->cl.description=description;
@@ -808,15 +953,33 @@ netlink_deliver_fn *netlink_init(struct netlink *st,
     st->max_start_pad=0;
     st->max_end_pad=0;
     st->clients=NULL;
-    st->set_route=set_route;
+    st->routes=NULL;
+    st->n_clients=0;
+    st->set_routes=set_routes;
     st->deliver_to_host=to_host;
 
-    st->name=dict_read_string(dict,"name",False,"netlink",loc);
+    st->name=dict_read_string(dict,"name",False,description,loc);
     if (!st->name) st->name=description;
-    dict_read_subnet_list(dict, "networks", True, "netlink", loc,
-                         &st->networks);
-    dict_read_subnet_list(dict, "exclude-remote-networks", False, "netlink",
-                         loc, &st->exclude_remote_networks);
+    l=dict_lookup(dict,"networks");
+    if (l) 
+       st->networks=string_list_to_ipset(l,loc,st->name,"networks");
+    else {
+       struct ipset *empty;
+       empty=ipset_new();
+       st->networks=ipset_complement(empty);
+       ipset_free(empty);
+    }
+    l=dict_lookup(dict,"remote-networks");
+    if (l) {
+       st->remote_networks=string_list_to_ipset(l,loc,st->name,
+                                                "remote-networks");
+    } else {
+       struct ipset *empty;
+       empty=ipset_new();
+       st->remote_networks=ipset_complement(empty);
+       ipset_free(empty);
+    }
+
     sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
     ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
     if (sa && ptpa) {
@@ -828,16 +991,18 @@ netlink_deliver_fn *netlink_init(struct netlink *st,
                 "ptp-address for this netlink device\n");
     }
     if (sa) {
-       st->secnet_address=string_to_ipaddr(sa,"netlink");
+       st->secnet_address=string_item_to_ipaddr(sa,"netlink");
        st->ptp=False;
     } else {
-       st->secnet_address=string_to_ipaddr(ptpa,"netlink");
+       st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
        st->ptp=True;
     }
+    /* To be strictly correct we could subtract secnet_address from
+       networks here.  It shouldn't make any practical difference,
+       though, and will make the route dump look complicated... */
+    st->subnets=ipset_to_subnet_list(st->networks);
     st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
     buffer_new(&st->icmp,ICMP_BUFSIZE);
-    st->n_routes=0;
-    st->routes=NULL;
     st->outcount=0;
     st->localcount=0;
 
@@ -862,17 +1027,15 @@ struct null {
     struct netlink nl;
 };
 
-static bool_t null_set_route(void *sst, struct netlink_route *route)
+static bool_t null_set_route(void *sst, struct netlink_client *routes)
 {
     struct null *st=sst;
-    string_t t;
-
-    if (route->up!=route->kup) {
-       t=subnet_to_string(&route->net);
-       Message(M_INFO,"%s: setting route %s to state %s\n",st->nl.name,
-               t, route->up?"up":"down");
-       free(t);
-       route->kup=route->up;
+
+    if (routes->up!=routes->kup) {
+       Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
+               st->nl.name,routes->name,
+               routes->up?"up":"down");
+       routes->kup=routes->up;
        return True;
     }
     return False;