/* User-kernel network link */
-/* Each netlink device is actually a router, with its own IP address.
- We do things like decreasing the TTL and recalculating the header
- checksum, generating ICMP, responding to pings, etc. */
+/* See RFCs 791, 792, 1123 and 1812 */
-/* This is where we have the anti-spoofing paranoia - before sending a
- packet to the kernel we check that the tunnel it came over could
- reasonably have produced it. */
+/* The netlink device is actually a router. Tunnels are unnumbered
+ point-to-point lines (RFC1812 section 2.2.7); the router has a
+ single address (the 'router-id'). */
+/* This is where we currently have the anti-spoofing paranoia - before
+ sending a packet to the kernel we check that the tunnel it came
+ over could reasonably have produced it. */
+
+
+/* Points to note from RFC1812 (which may require changes in this
+ file):
+
+3.3.4 Maximum Transmission Unit - MTU
+
+ The MTU of each logical interface MUST be configurable within the
+ range of legal MTUs for the interface.
+
+ Many Link Layer protocols define a maximum frame size that may be
+ sent. In such cases, a router MUST NOT allow an MTU to be set which
+ would allow sending of frames larger than those allowed by the Link
+ Layer protocol. However, a router SHOULD be willing to receive a
+ packet as large as the maximum frame size even if that is larger than
+ the MTU.
+
+4.2.1 A router SHOULD count datagrams discarded.
+
+4.2.2.1 Source route options - we probably should implement processing
+of source routes, even though mostly the security policy will prevent
+their use.
+
+5.3.13.4 Source Route Options
+
+ A router MUST implement support for source route options in forwarded
+ packets. A router MAY implement a configuration option that, when
+ enabled, causes all source-routed packets to be discarded. However,
+ such an option MUST NOT be enabled by default.
+
+5.3.13.5 Record Route Option
+
+ Routers MUST support the Record Route option in forwarded packets.
+
+ A router MAY provide a configuration option that, if enabled, will
+ cause the router to ignore (i.e., pass through unchanged) Record
+ Route options in forwarded packets. If provided, such an option MUST
+ default to enabling the record-route. This option should not affect
+ the processing of Record Route options in datagrams received by the
+ router itself (in particular, Record Route options in ICMP echo
+ requests will still be processed according to Section [4.3.3.6]).
+
+5.3.13.6 Timestamp Option
+
+ Routers MUST support the timestamp option in forwarded packets. A
+ timestamp value MUST follow the rules given [INTRO:2].
+
+ If the flags field = 3 (timestamp and prespecified address), the
+ router MUST add its timestamp if the next prespecified address
+ matches any of the router's IP addresses. It is not necessary that
+ the prespecified address be either the address of the interface on
+ which the packet arrived or the address of the interface over which
+ it will be sent.
+
+
+4.2.2.7 Fragmentation: RFC 791 Section 3.2
+
+ Fragmentation, as described in [INTERNET:1], MUST be supported by a
+ router.
+
+4.2.2.8 Reassembly: RFC 791 Section 3.2
+
+ As specified in the corresponding section of [INTRO:2], a router MUST
+ support reassembly of datagrams that it delivers to itself.
+
+4.2.2.9 Time to Live: RFC 791 Section 3.2
+
+ Note in particular that a router MUST NOT check the TTL of a packet
+ except when forwarding it.
+
+ A router MUST NOT discard a datagram just because it was received
+ with TTL equal to zero or one; if it is to the router and otherwise
+ valid, the router MUST attempt to receive it.
+
+ On messages the router originates, the IP layer MUST provide a means
+ for the transport layer to set the TTL field of every datagram that
+ is sent. When a fixed TTL value is used, it MUST be configurable.
+
+
+8.1 The Simple Network Management Protocol - SNMP
+8.1.1 SNMP Protocol Elements
+
+ Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
+ using UDP/IP as its transport and network protocols.
+
+
+*/
+
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
#include "secnet.h"
#include "util.h"
#include "ipaddr.h"
#include "netlink.h"
#include "process.h"
-#define OPT_SOFTROUTE 1
-#define OPT_ALLOWROUTE 2
+#define ICMP_TYPE_ECHO_REPLY 0
+
+#define ICMP_TYPE_UNREACHABLE 3
+#define ICMP_CODE_NET_UNREACHABLE 0
+#define ICMP_CODE_PROTOCOL_UNREACHABLE 2
+#define ICMP_CODE_FRAGMENTATION_REQUIRED 4
+#define ICMP_CODE_NET_PROHIBITED 13
+
+#define ICMP_TYPE_ECHO_REQUEST 8
+
+#define ICMP_TYPE_TIME_EXCEEDED 11
+#define ICMP_CODE_TTL_EXCEEDED 0
/* Generic IP checksum routine */
-static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
+static inline uint16_t ip_csum(uint8_t *iph,int32_t count)
{
register uint32_t sum=0;
* By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
* Arnt Gulbrandsen.
*/
-static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
+static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl) {
uint32_t sum;
- __asm__ __volatile__("
- movl (%1), %0
- subl $4, %2
- jbe 2f
- addl 4(%1), %0
- adcl 8(%1), %0
- adcl 12(%1), %0
-1: adcl 16(%1), %0
- lea 4(%1), %1
- decl %2
- jne 1b
- adcl $0, %0
- movl %0, %2
- shrl $16, %0
- addw %w2, %w0
- adcl $0, %0
- notl %0
-2:
- "
+ __asm__ __volatile__(
+ "movl (%1), %0 ;\n"
+ "subl $4, %2 ;\n"
+ "jbe 2f ;\n"
+ "addl 4(%1), %0 ;\n"
+ "adcl 8(%1), %0 ;\n"
+ "adcl 12(%1), %0 ;\n"
+"1: adcl 16(%1), %0 ;\n"
+ "lea 4(%1), %1 ;\n"
+ "decl %2 ;\n"
+ "jne 1b ;\n"
+ "adcl $0, %0 ;\n"
+ "movl %0, %2 ;\n"
+ "shrl $16, %0 ;\n"
+ "addw %w2, %w0 ;\n"
+ "adcl $0, %0 ;\n"
+ "notl %0 ;\n"
+"2: ;\n"
/* Since the input registers which are loaded with iph and ipl
are modified, we must also specify them as outputs, or gcc
will assume they contain their original values. */
: "=r" (sum), "=r" (iph), "=r" (ihl)
- : "1" (iph), "2" (ihl));
+ : "1" (iph), "2" (ihl)
+ : "memory");
return sum;
}
#else
-static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
+static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
{
+ assert(ihl < INT_MAX/4);
return ip_csum(iph,ihl*4);
}
#endif
struct netlink_client *client,
struct buffer_if *buf);
+/* XXX RFC1812 4.3.2.5:
+ All other ICMP error messages (Destination Unreachable,
+ Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
+ precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
+ CONTROL). The IP Precedence value for these error messages MAY be
+ settable.
+ */
static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
uint32_t dest,uint16_t len)
{
h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
h->iph.id=0;
h->iph.frag_off=0;
- h->iph.ttl=255;
+ h->iph.ttl=255; /* XXX should be configurable */
h->iph.protocol=1;
h->iph.saddr=htonl(st->secnet_address);
h->iph.daddr=htonl(dest);
/* Fill in the ICMP checksum field correctly */
static void netlink_icmp_csum(struct icmphdr *h)
{
- uint32_t len;
+ int32_t len;
len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
h->check=0;
/* How much of the original IP packet do we include in its ICMP
response? The header plus up to 64 bits. */
+
+/* XXX TODO RFC1812:
+4.3.2.3 Original Message Header
+
+ Historically, every ICMP error message has included the Internet
+ header and at least the first 8 data bytes of the datagram that
+ triggered the error. This is no longer adequate, due to the use of
+ IP-in-IP tunneling and other technologies. Therefore, the ICMP
+ datagram SHOULD contain as much of the original datagram as possible
+ without the length of the ICMP datagram exceeding 576 bytes. The
+ returned IP header (and user data) MUST be identical to that which
+ was received, except that the router is not required to undo any
+ modifications to the IP header that are normally performed in
+ forwarding that were performed before the error was detected (e.g.,
+ decrementing the TTL, or updating options). Note that the
+ requirements of Section [4.3.3.5] supersede this requirement in some
+ cases (i.e., for a Parameter Problem message, if the problem is in a
+ modified field, the router must undo the modification). See Section
+ [4.3.3.5]).
+ */
static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
{
struct iphdr *iph=(struct iphdr *)buf->start;
/*
* RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
* checksum.
+ * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
*
* Is the datagram acceptable?
*
* 3. Checksums correctly.
* 4. Doesn't have a bogus length
*/
-static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
+static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
+ char *errmsgbuf, int errmsgbuflen)
{
+#define BAD(...) do{ \
+ snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
+ return False; \
+ }while(0)
+
struct iphdr *iph=(struct iphdr *)buf->start;
- uint32_t len;
+ int32_t len;
- if (iph->ihl < 5 || iph->version != 4) return False;
- if (buf->size < iph->ihl*4) return False;
- if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
+ if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
+ if (iph->version != 4) BAD("version %u",iph->version);
+ if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
+ if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
len=ntohs(iph->tot_len);
/* There should be no padding */
- if (buf->size!=len || len<(iph->ihl<<2)) return False;
+ if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
+ if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
/* XXX check that there's no source route specified */
return True;
+
+#undef BAD
}
/* Deliver a packet. "client" is the _origin_ of the packet, not its
BUF_ASSERT_USED(buf);
if (dest==st->secnet_address) {
- Message(M_ERR,"%s: trying to deliver a packet to myself!\n");
+ Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
BUF_FREE(buf);
return;
}
string_t s,d;
s=ipaddr_to_string(source);
d=ipaddr_to_string(dest);
- Message(M_ERR,"%s: don't know where to deliver packet "
+ Message(M_DEBUG,"%s: don't know where to deliver packet "
"(s=%s, d=%s)\n", st->name, s, d);
free(s); free(d);
- netlink_icmp_simple(st,buf,client,3,0);
+ netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
+ ICMP_CODE_NET_UNREACHABLE);
BUF_FREE(buf);
}
} else {
st->name,s,d);
free(s); free(d);
- netlink_icmp_simple(st,buf,client,3,9);
+ netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
+ ICMP_CODE_NET_PROHIBITED);
BUF_FREE(buf);
- }
- if (best_quality>0) {
- /* XXX Fragment if required */
- st->routes[best_match]->deliver(
- st->routes[best_match]->dst, buf);
- st->routes[best_match]->outcount++;
- BUF_ASSERT_FREE(buf);
} else {
- /* Generate ICMP destination unreachable */
- netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
- BUF_FREE(buf);
+ if (best_quality>0) {
+ /* XXX Fragment if required */
+ st->routes[best_match]->deliver(
+ st->routes[best_match]->dst, buf);
+ st->routes[best_match]->outcount++;
+ BUF_ASSERT_FREE(buf);
+ } else {
+ /* Generate ICMP destination unreachable */
+ netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
+ ICMP_CODE_NET_UNREACHABLE); /* client==NULL */
+ BUF_FREE(buf);
+ }
}
}
BUF_ASSERT_FREE(buf);
/* Packet has already been checked */
if (iph->ttl<=1) {
/* Generate ICMP time exceeded */
- netlink_icmp_simple(st,buf,client,11,0);
+ netlink_icmp_simple(st,buf,client,ICMP_TYPE_TIME_EXCEEDED,
+ ICMP_CODE_TTL_EXCEEDED);
BUF_FREE(buf);
return;
}
if (h->iph.protocol==1) {
/* It's ICMP */
- if (h->type==8 && h->code==0) {
+ if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
/* ICMP echo-request. Special case: we re-use the buffer
to construct the reply. */
- h->type=0;
+ h->type=ICMP_TYPE_ECHO_REPLY;
h->iph.daddr=h->iph.saddr;
h->iph.saddr=htonl(st->secnet_address);
- h->iph.ttl=255; /* Be nice and bump it up again... */
+ h->iph.ttl=255;
h->iph.check=0;
h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
netlink_icmp_csum(h);
Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
} else {
/* Send ICMP protocol unreachable */
- netlink_icmp_simple(st,buf,client,3,2);
+ netlink_icmp_simple(st,buf,client,ICMP_TYPE_UNREACHABLE,
+ ICMP_CODE_PROTOCOL_UNREACHABLE);
BUF_FREE(buf);
return;
}
{
uint32_t source,dest;
struct iphdr *iph;
+ char errmsgbuf[50];
BUF_ASSERT_USED(buf);
- if (!netlink_check(st,buf)) {
- Message(M_WARNING,"%s: bad IP packet from %s\n",
- st->name,client?client->name:"host");
+ if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
+ Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
+ st->name,client?client->name:"host",
+ errmsgbuf);
BUF_FREE(buf);
return;
}
static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
struct subnet_list *snets)
{
- uint32_t i;
+ int32_t i;
string_t net;
for (i=0; i<snets->entries; i++) {
Message(c,"%s: routing table:\n",st->name);
for (i=0; i<st->n_clients; i++) {
netlink_output_subnets(st,c,st->routes[i]->subnets);
- Message(c,"-> tunnel %s (%s,%s routes,%s,quality %d,use %d)\n",
+ Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
+ "quality %d,use %d,pri %lu)\n",
st->routes[i]->name,
+ st->routes[i]->up?"up":"down",
+ st->routes[i]->mtu,
st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
- st->routes[i]->up?"up":"down",
st->routes[i]->link_quality,
- st->routes[i]->outcount);
+ st->routes[i]->outcount,
+ (unsigned long)st->routes[i]->priority);
}
net=ipaddr_to_string(st->secnet_address);
Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
{
struct netlink *st=sst;
struct netlink_client *c;
- uint32_t i;
+ int32_t i;
/* All the networks serviced by the various tunnels should now
* have been registered. We build a routing table by sorting the
* clients by priority. */
- st->routes=safe_malloc(st->n_clients*sizeof(*st->routes),
- "netlink_phase_hook");
+ st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
+ "netlink_phase_hook");
/* Fill the table */
i=0;
- for (c=st->clients; c; c=c->next)
+ for (c=st->clients; c; c=c->next) {
+ assert(i<INT_MAX);
st->routes[i++]=c;
+ }
/* Sort the table in descending order of priority */
qsort(st->routes,st->n_clients,sizeof(*st->routes),
netlink_compare_client_priority);
netlink_dump_routes(st,True);
}
-static void netlink_inst_output_config(void *sst, struct buffer_if *buf)
-{
-/* struct netlink_client *c=sst; */
-/* struct netlink *st=c->nst; */
-
- /* For now we don't output anything */
- BUF_ASSERT_USED(buf);
-}
-
-static bool_t netlink_inst_check_config(void *sst, struct buffer_if *buf)
-{
-/* struct netlink_client *c=sst; */
-/* struct netlink *st=c->nst; */
-
- BUF_ASSERT_USED(buf);
- /* We need to eat all of the configuration information from the buffer
- for backward compatibility. */
- buf->size=0;
- return True;
-}
-
-static void netlink_inst_set_mtu(void *sst, uint32_t new_mtu)
+static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
{
struct netlink_client *c=sst;
}
static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
- void *dst, uint32_t max_start_pad,
- uint32_t max_end_pad)
+ void *dst, int32_t max_start_pad,
+ int32_t max_end_pad)
{
struct netlink_client *c=sst;
struct netlink *st=c->nst;
struct netlink_client *c;
string_t name;
struct ipset *networks;
- uint32_t options,priority,mtu;
+ uint32_t options,priority;
+ int32_t mtu;
list_t *l;
name=dict_read_string(dict, "name", True, st->name, loc);
c->ops.reg=netlink_inst_reg;
c->ops.deliver=netlink_inst_incoming;
c->ops.set_quality=netlink_set_quality;
- c->ops.output_config=netlink_inst_output_config;
- c->ops.check_config=netlink_inst_check_config;
c->ops.set_mtu=netlink_inst_set_mtu;
c->nst=st;
c->deliver=NULL;
c->dst=NULL;
c->name=name;
- c->link_quality=LINK_QUALITY_DOWN;
+ c->link_quality=LINK_QUALITY_UNUSED;
c->mtu=mtu?mtu:st->mtu;
c->options=options;
c->outcount=0;
c->kup=False;
c->next=st->clients;
st->clients=c;
+ assert(st->n_clients < INT_MAX);
st->n_clients++;
return &c->cl;
netlink_deliver_fn *netlink_init(struct netlink *st,
void *dst, struct cloc loc,
- dict_t *dict, string_t description,
+ dict_t *dict, cstring_t description,
netlink_route_fn *set_routes,
netlink_deliver_fn *to_host)
{
if (l)
st->networks=string_list_to_ipset(l,loc,st->name,"networks");
else {
- Message(M_WARNING,"%s: no local networks (parameter \"networks\") "
- "defined\n",st->name);
- st->networks=ipset_new();
+ struct ipset *empty;
+ empty=ipset_new();
+ st->networks=ipset_complement(empty);
+ ipset_free(empty);
}
l=dict_lookup(dict,"remote-networks");
if (l) {
return new_closure(&st->nl.cl);
}
-init_module netlink_module;
void netlink_module(dict_t *dict)
{
add_closure(dict,"null-netlink",null_apply);