1 /* User-kernel network link */
3 /* Each netlink device is actually a router, with its own IP address.
4 We do things like decreasing the TTL and recalculating the header
5 checksum, generating ICMP, responding to pings, etc. */
7 /* This is where we have the anti-spoofing paranoia - before sending a
8 packet to the kernel we check that the tunnel it came over could
9 reasonably have produced it. */
11 /* XXX new feature: "point-to-point" mode. Instead of specifying a
12 secnet-address in the configuration dictionary, the user specifies
13 the address of the machine at the other end of the (one and only)
14 tunnel. We bypass all IP packet processing code. This mode is
15 useful for leafnodes like laptops, which don't require a secnet
24 /* Generic IP checksum routine */
25 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
27 register uint32_t sum=0;
30 sum+=ntohs(*(uint16_t *)iph);
37 sum=(sum&0xffff)+(sum>>16);
43 * This is a version of ip_compute_csum() optimized for IP headers,
44 * which always checksum on 4 octet boundaries.
46 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
49 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
52 __asm__ __volatile__("
71 /* Since the input registers which are loaded with iph and ipl
72 are modified, we must also specify them as outputs, or gcc
73 will assume they contain their original values. */
74 : "=r" (sum), "=r" (iph), "=r" (ihl)
75 : "1" (iph), "2" (ihl));
79 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
81 return ip_csum(iph,ihl*4);
86 #if defined (WORDS_BIGENDIAN)
102 /* The options start here. */
125 static void netlink_packet_deliver(struct netlink *st,
126 struct netlink_client *client,
127 struct buffer_if *buf);
129 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
130 uint32_t dest,uint16_t len)
134 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
135 buffer_init(&st->icmp,st->max_start_pad);
136 h=buf_append(&st->icmp,sizeof(*h));
141 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
146 h->iph.saddr=htonl(st->secnet_address);
147 h->iph.daddr=htonl(dest);
149 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
156 /* Fill in the ICMP checksum field correctly */
157 static void netlink_icmp_csum(struct icmphdr *h)
161 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
163 h->check=ip_csum(&h->type,len);
167 * An ICMP error message MUST NOT be sent as the result of
170 * * an ICMP error message, or
172 * * a datagram destined to an IP broadcast or IP multicast
175 * * a datagram sent as a link-layer broadcast, or
177 * * a non-initial fragment, or
179 * * a datagram whose source address does not define a single
180 * host -- e.g., a zero address, a loopback address, a
181 * broadcast address, a multicast address, or a Class E
184 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
189 iph=(struct iphdr *)buf->start;
190 if (iph->protocol==1) return False; /* Overly-broad; we may reply to
191 eg. icmp echo-request */
192 /* How do we spot broadcast destination addresses? */
193 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
194 source=ntohl(iph->saddr);
195 if (source==0) return False;
196 if ((source&0xff000000)==0x7f000000) return False;
197 /* How do we spot broadcast source addresses? */
198 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
199 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
203 /* How much of the original IP packet do we include in its ICMP
204 response? The header plus up to 64 bits. */
205 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
207 struct iphdr *iph=(struct iphdr *)buf->start;
211 /* We include the first 8 bytes of the packet data, provided they exist */
213 plen=ntohs(iph->tot_len);
214 return (hlen>plen?plen:hlen);
217 /* client indicates where the packet we're constructing a response to
218 comes from. NULL indicates the host. */
219 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
220 struct netlink_client *client,
221 uint8_t type, uint8_t code)
223 struct iphdr *iph=(struct iphdr *)buf->start;
227 if (netlink_icmp_may_reply(buf)) {
228 len=netlink_icmp_reply_len(buf);
229 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
230 h->type=type; h->code=code;
231 memcpy(buf_append(&st->icmp,len),buf->start,len);
232 netlink_icmp_csum(h);
233 netlink_packet_deliver(st,NULL,&st->icmp);
234 BUF_ASSERT_FREE(&st->icmp);
239 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
242 * Is the datagram acceptable?
244 * 1. Length at least the size of an ip header
246 * 3. Checksums correctly.
247 * 4. Doesn't have a bogus length
249 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
251 struct iphdr *iph=(struct iphdr *)buf->start;
254 if (iph->ihl < 5 || iph->version != 4) return False;
255 if (buf->size < iph->ihl*4) return False;
256 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
257 len=ntohs(iph->tot_len);
258 /* There should be no padding */
259 if (buf->size!=len || len<(iph->ihl<<2)) return False;
260 /* XXX check that there's no source route specified */
264 /* Deliver a packet. "client" points to the _origin_ of the packet, not
265 its destination. (May be used when sending ICMP response - avoid
266 asymmetric routing.) */
267 static void netlink_packet_deliver(struct netlink *st,
268 struct netlink_client *client,
269 struct buffer_if *buf)
271 struct iphdr *iph=(struct iphdr *)buf->start;
272 uint32_t dest=ntohl(iph->daddr);
273 uint32_t source=ntohl(iph->saddr);
274 uint32_t best_quality;
278 BUF_ASSERT_USED(buf);
280 if (dest==st->secnet_address) {
281 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
286 /* XXX we're going to need an extra value 'allow_route' for the
287 source of the packet. It's always True for packets from the
288 host. For packets from tunnels, we consult the client
289 options. If !allow_route and the destination is a tunnel that
290 also doesn't allow routing, we must reject the packet with an
291 'administratively prohibited' or something similar ICMP. */
293 /* Origin of packet is host or secnet. Might be for a tunnel. */
296 for (i=0; i<st->n_routes; i++) {
297 if (st->routes[i].up && subnet_match(&st->routes[i].net,dest)) {
298 if (st->routes[i].c->link_quality>best_quality
299 || best_quality==0) {
300 best_quality=st->routes[i].c->link_quality;
302 /* If quality isn't perfect we may wish to
303 consider kicking the tunnel with a 0-length
304 packet to prompt it to perform a key setup.
305 Then it'll eventually decide it's up or
307 /* If quality is perfect we don't need to search
309 if (best_quality>=MAXIMUM_LINK_QUALITY) break;
313 if (best_match==-1) {
314 /* Not going down a tunnel. Might be for the host.
315 XXX think about this - only situation should be if we're
317 if (source!=st->secnet_address) {
318 Message(M_ERROR,"netlink_packet_deliver: outgoing packet "
319 "from host that won't fit down any of our tunnels!\n");
320 /* XXX I think this could also occur if a soft tunnel just
321 went down, but still had packets queued in the kernel. */
324 st->deliver_to_host(st->dst,NULL,buf);
325 BUF_ASSERT_FREE(buf);
328 if (best_quality>0) {
329 st->routes[best_match].c->deliver(
330 st->routes[best_match].c->dst,
331 st->routes[best_match].c, buf);
332 BUF_ASSERT_FREE(buf);
334 /* Generate ICMP destination unreachable */
335 netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
339 } else { /* client is set */
340 /* We know the origin is a tunnel - packet must be for the host */
341 /* XXX THIS IS NOT NECESSARILY TRUE, AND NEEDS FIXING */
342 /* THIS FUNCTION MUST JUST DELIVER THE PACKET: IT MUST ASSUME
343 THE PACKET HAS ALREADY BEEN CHECKED */
344 if (subnet_matches_list(&st->networks,dest)) {
345 st->deliver_to_host(st->dst,NULL,buf);
346 BUF_ASSERT_FREE(buf);
348 Message(M_ERROR,"%s: packet from tunnel %s can't be delivered "
349 "to the host\n",st->name,client->name);
350 netlink_icmp_simple(st,buf,client,3,0);
354 BUF_ASSERT_FREE(buf);
357 static void netlink_packet_forward(struct netlink *st,
358 struct netlink_client *client,
359 struct buffer_if *buf)
361 struct iphdr *iph=(struct iphdr *)buf->start;
363 BUF_ASSERT_USED(buf);
365 /* Packet has already been checked */
367 /* Generate ICMP time exceeded */
368 netlink_icmp_simple(st,buf,client,11,0);
374 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
376 netlink_packet_deliver(st,client,buf);
377 BUF_ASSERT_FREE(buf);
380 /* Deal with packets addressed explicitly to us */
381 static void netlink_packet_local(struct netlink *st,
382 struct netlink_client *client,
383 struct buffer_if *buf)
387 h=(struct icmphdr *)buf->start;
389 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
390 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
391 "ignoring it\n",st->name);
396 if (h->iph.protocol==1) {
398 if (h->type==8 && h->code==0) {
399 /* ICMP echo-request. Special case: we re-use the buffer
400 to construct the reply. */
402 h->iph.daddr=h->iph.saddr;
403 h->iph.saddr=htonl(st->secnet_address);
404 h->iph.ttl=255; /* Be nice and bump it up again... */
406 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
407 netlink_icmp_csum(h);
408 netlink_packet_deliver(st,NULL,buf);
411 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
413 /* Send ICMP protocol unreachable */
414 netlink_icmp_simple(st,buf,client,3,2);
422 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
424 static void netlink_incoming(void *sst, void *cid, struct buffer_if *buf)
426 struct netlink *st=sst;
427 struct netlink_client *client=cid;
428 uint32_t source,dest;
431 BUF_ASSERT_USED(buf);
432 if (!netlink_check(st,buf)) {
433 Message(M_WARNING,"%s: bad IP packet from %s\n",
434 st->name,client?client->name:"host");
438 iph=(struct iphdr *)buf->start;
440 source=ntohl(iph->saddr);
441 dest=ntohl(iph->daddr);
445 /* Check that the packet source is in 'nets' and its destination is
447 if (!subnet_matches_list(client->networks,source)) {
449 s=ipaddr_to_string(source);
450 d=ipaddr_to_string(dest);
451 Message(M_WARNING,"%s: packet from tunnel %s with bad "
452 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
458 if (!subnet_matches_list(&st->networks,source)) {
460 s=ipaddr_to_string(source);
461 d=ipaddr_to_string(dest);
462 Message(M_WARNING,"%s: outgoing packet with bad source address "
463 "(s=%s,d=%s)\n",st->name,s,d);
469 /* (st->secnet_address needs checking before matching destination
471 if (dest==st->secnet_address) {
472 netlink_packet_local(st,client,buf);
473 BUF_ASSERT_FREE(buf);
477 /* Check for free routing */
478 if (!subnet_matches_list(&st->networks,dest)) {
480 s=ipaddr_to_string(source);
481 d=ipaddr_to_string(dest);
482 Message(M_WARNING,"%s: incoming packet from tunnel %s "
483 "with bad destination address "
484 "(s=%s,d=%s)\n",st->name,client->name,s,d);
490 netlink_packet_forward(st,client,buf);
491 BUF_ASSERT_FREE(buf);
494 static void netlink_set_softlinks(struct netlink *st, struct netlink_client *c,
495 bool_t up, uint32_t quality)
499 if (!st->routes) return; /* Table has not yet been created */
500 for (i=0; i<st->n_routes; i++) {
501 if (st->routes[i].c==c) {
502 st->routes[i].quality=quality;
503 if (!st->routes[i].hard) {
505 st->set_route(st->dst,&st->routes[i]);
511 static void netlink_set_quality(void *sst, void *cid, uint32_t quality)
513 struct netlink *st=sst;
514 struct netlink_client *c=cid;
516 c->link_quality=quality;
517 if (c->link_quality==LINK_QUALITY_DOWN) {
518 netlink_set_softlinks(st,c,False,c->link_quality);
520 netlink_set_softlinks(st,c,True,c->link_quality);
524 static void *netlink_regnets(void *sst, struct subnet_list *nets,
525 netlink_deliver_fn *deliver, void *dst,
526 uint32_t max_start_pad, uint32_t max_end_pad,
527 uint32_t options, string_t client_name)
529 struct netlink *st=sst;
530 struct netlink_client *c;
532 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
533 "max_start_pad=%d, max_end_pad=%d\n",
534 nets->entries,max_start_pad,max_end_pad);
536 if ((options&NETLINK_OPTION_SOFTROUTE) && !st->set_route) {
537 Message(M_ERROR,"%s: this netlink device does not support "
542 if (options&NETLINK_OPTION_SOFTROUTE) {
543 /* XXX for now we assume that soft routes require root privilege;
544 this may not always be true. The device driver can tell us. */
545 require_root_privileges=True;
546 require_root_privileges_explanation="netlink: soft routes";
549 /* Check that nets do not intersect st->exclude_remote_networks;
550 refuse to register if they do. */
551 if (subnet_lists_intersect(&st->exclude_remote_networks,nets)) {
552 Message(M_ERROR,"%s: site %s specifies networks that "
553 "intersect with the explicitly excluded remote networks\n",
554 st->name,client_name);
558 c=safe_malloc(sizeof(*c),"netlink_regnets");
562 c->name=client_name; /* XXX copy it? */
564 c->link_quality=LINK_QUALITY_DOWN;
567 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
568 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
569 st->n_routes+=nets->entries;
574 static void netlink_dump_routes(struct netlink *st, bool_t requested)
580 if (requested) c=M_WARNING;
581 Message(c,"%s: routing table:\n",st->name);
582 for (i=0; i<st->n_routes; i++) {
583 net=subnet_to_string(&st->routes[i].net);
584 Message(c,"%s -> tunnel %s (%s,%s route,%s,quality %d)\n",net,
585 st->routes[i].c->name,
586 st->routes[i].hard?"hard":"soft",
587 st->routes[i].allow_route?"free":"restricted",
588 st->routes[i].up?"up":"down",
589 st->routes[i].quality);
592 Message(c,"%s/32 -> netlink \"%s\"\n",
593 ipaddr_to_string(st->secnet_address),st->name);
594 for (i=0; i<st->networks.entries; i++) {
595 net=subnet_to_string(&st->networks.list[i]);
596 Message(c,"%s -> host\n",net);
601 static int netlink_compare_route_specificity(const void *ap, const void *bp)
603 const struct netlink_route *a=ap;
604 const struct netlink_route *b=bp;
606 if (a->net.len==b->net.len) return 0;
607 if (a->net.len<b->net.len) return 1;
611 static void netlink_phase_hook(void *sst, uint32_t new_phase)
613 struct netlink *st=sst;
614 struct netlink_client *c;
617 /* All the networks serviced by the various tunnels should now
618 * have been registered. We build a routing table by sorting the
619 * routes into most-specific-first order. */
620 st->routes=safe_malloc(st->n_routes*sizeof(*st->routes),
621 "netlink_phase_hook");
624 for (c=st->clients; c; c=c->next) {
625 for (j=0; j<c->networks->entries; j++) {
626 st->routes[i].net=c->networks->list[j];
628 /* Hard routes are always up;
629 soft routes default to down */
630 st->routes[i].up=c->options&NETLINK_OPTION_SOFTROUTE?False:True;
631 st->routes[i].kup=False;
632 st->routes[i].hard=c->options&NETLINK_OPTION_SOFTROUTE?False:True;
633 st->routes[i].allow_route=c->options&NETLINK_OPTION_ALLOW_ROUTE?
635 st->routes[i].quality=c->link_quality;
639 /* ASSERT i==st->n_routes */
640 if (i!=st->n_routes) {
641 fatal("netlink: route count error: expected %d got %d\n",
644 /* Sort the table in descending order of specificity */
645 qsort(st->routes,st->n_routes,sizeof(*st->routes),
646 netlink_compare_route_specificity);
648 netlink_dump_routes(st,False);
651 static void netlink_signal_handler(void *sst, int signum)
653 struct netlink *st=sst;
654 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
655 netlink_dump_routes(st,True);
658 netlink_deliver_fn *netlink_init(struct netlink *st,
659 void *dst, struct cloc loc,
660 dict_t *dict, string_t description,
661 netlink_route_fn *set_route,
662 netlink_deliver_fn *to_host)
665 st->cl.description=description;
666 st->cl.type=CL_NETLINK;
668 st->cl.interface=&st->ops;
670 st->ops.regnets=netlink_regnets;
671 st->ops.deliver=netlink_incoming;
672 st->ops.set_quality=netlink_set_quality;
676 st->set_route=set_route;
677 st->deliver_to_host=to_host;
679 st->name=dict_read_string(dict,"name",False,"netlink",loc);
680 if (!st->name) st->name=description;
681 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
683 dict_read_subnet_list(dict, "exclude-remote-networks", False, "netlink",
684 loc, &st->exclude_remote_networks);
685 /* secnet-address does not have to be in local-networks;
686 however, it should be advertised in the 'sites' file for the
688 st->secnet_address=string_to_ipaddr(
689 dict_find_item(dict,"secnet-address", True, "netlink", loc),"netlink");
690 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
691 buffer_new(&st->icmp,ICMP_BUFSIZE);
695 add_hook(PHASE_SETUP,netlink_phase_hook,st);
696 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
698 return netlink_incoming;
701 /* No connection to the kernel at all... */
707 static bool_t null_set_route(void *sst, struct netlink_route *route)
712 if (route->up!=route->kup) {
713 t=subnet_to_string(&route->net);
714 Message(M_INFO,"%s: setting route %s to state %s\n",st->nl.name,
715 t, route->up?"up":"down");
717 route->kup=route->up;
723 static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
728 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
735 st=safe_malloc(sizeof(*st),"null_apply");
737 item=list_elem(args,0);
738 if (!item || item->type!=t_dict)
739 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
741 dict=item->data.dict;
743 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
746 return new_closure(&st->nl.cl);
749 init_module netlink_module;
750 void netlink_module(dict_t *dict)
752 add_closure(dict,"null-netlink",null_apply);