1 /* User-kernel network link */
3 /* Each netlink device is actually a router, with its own IP address.
4 We do things like decreasing the TTL and recalculating the header
5 checksum, generating ICMP, responding to pings, etc. */
7 /* This is where we have the anti-spoofing paranoia - before sending a
8 packet to the kernel we check that the tunnel it came over could
9 reasonably have produced it. */
17 /* Generic IP checksum routine */
18 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
20 register uint32_t sum=0;
23 sum+=ntohs(*(uint16_t *)iph);
30 sum=(sum&0xffff)+(sum>>16);
36 * This is a version of ip_compute_csum() optimized for IP headers,
37 * which always checksum on 4 octet boundaries.
39 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
42 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
45 __asm__ __volatile__("
64 /* Since the input registers which are loaded with iph and ipl
65 are modified, we must also specify them as outputs, or gcc
66 will assume they contain their original values. */
67 : "=r" (sum), "=r" (iph), "=r" (ihl)
68 : "1" (iph), "2" (ihl));
72 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
74 return ip_csum(iph,ihl*4);
79 #if defined (WORDS_BIGENDIAN)
95 /* The options start here. */
118 static void netlink_packet_deliver(struct netlink *st,
119 struct netlink_client *client,
120 struct buffer_if *buf);
122 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
123 uint32_t dest,uint16_t len)
127 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
128 buffer_init(&st->icmp,st->max_start_pad);
129 h=buf_append(&st->icmp,sizeof(*h));
134 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
139 h->iph.saddr=htonl(st->secnet_address);
140 h->iph.daddr=htonl(dest);
142 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
149 /* Fill in the ICMP checksum field correctly */
150 static void netlink_icmp_csum(struct icmphdr *h)
154 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
156 h->check=ip_csum(&h->type,len);
160 * An ICMP error message MUST NOT be sent as the result of
163 * * an ICMP error message, or
165 * * a datagram destined to an IP broadcast or IP multicast
168 * * a datagram sent as a link-layer broadcast, or
170 * * a non-initial fragment, or
172 * * a datagram whose source address does not define a single
173 * host -- e.g., a zero address, a loopback address, a
174 * broadcast address, a multicast address, or a Class E
177 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
180 struct icmphdr *icmph;
183 iph=(struct iphdr *)buf->start;
184 icmph=(struct icmphdr *)buf->start;
185 if (iph->protocol==1) {
186 switch(icmph->type) {
187 case 3: /* Destination unreachable */
188 case 11: /* Time Exceeded */
189 case 12: /* Parameter Problem */
193 /* How do we spot broadcast destination addresses? */
194 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
195 source=ntohl(iph->saddr);
196 if (source==0) return False;
197 if ((source&0xff000000)==0x7f000000) return False;
198 /* How do we spot broadcast source addresses? */
199 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
200 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
204 /* How much of the original IP packet do we include in its ICMP
205 response? The header plus up to 64 bits. */
206 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
208 struct iphdr *iph=(struct iphdr *)buf->start;
212 /* We include the first 8 bytes of the packet data, provided they exist */
214 plen=ntohs(iph->tot_len);
215 return (hlen>plen?plen:hlen);
218 /* client indicates where the packet we're constructing a response to
219 comes from. NULL indicates the host. */
220 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
221 struct netlink_client *client,
222 uint8_t type, uint8_t code)
224 struct iphdr *iph=(struct iphdr *)buf->start;
228 if (netlink_icmp_may_reply(buf)) {
229 len=netlink_icmp_reply_len(buf);
230 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
231 h->type=type; h->code=code;
232 memcpy(buf_append(&st->icmp,len),buf->start,len);
233 netlink_icmp_csum(h);
234 netlink_packet_deliver(st,NULL,&st->icmp);
235 BUF_ASSERT_FREE(&st->icmp);
240 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
243 * Is the datagram acceptable?
245 * 1. Length at least the size of an ip header
247 * 3. Checksums correctly.
248 * 4. Doesn't have a bogus length
250 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
252 struct iphdr *iph=(struct iphdr *)buf->start;
255 if (iph->ihl < 5 || iph->version != 4) return False;
256 if (buf->size < iph->ihl*4) return False;
257 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
258 len=ntohs(iph->tot_len);
259 /* There should be no padding */
260 if (buf->size!=len || len<(iph->ihl<<2)) return False;
261 /* XXX check that there's no source route specified */
265 /* Deliver a packet. "client" points to the _origin_ of the packet, not
266 its destination. (May be used when sending ICMP response - avoid
267 asymmetric routing.) */
268 static void netlink_packet_deliver(struct netlink *st,
269 struct netlink_client *client,
270 struct buffer_if *buf)
272 struct iphdr *iph=(struct iphdr *)buf->start;
273 uint32_t dest=ntohl(iph->daddr);
274 uint32_t source=ntohl(iph->saddr);
275 uint32_t best_quality;
279 BUF_ASSERT_USED(buf);
281 if (dest==st->secnet_address) {
282 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
287 /* XXX we're going to need an extra value 'allow_route' for the
288 source of the packet. It's always True for packets from the
289 host. For packets from tunnels, we consult the client
290 options. If !allow_route and the destination is a tunnel that
291 also doesn't allow routing, we must reject the packet with an
292 'administratively prohibited' or something similar ICMP. */
294 /* Origin of packet is host or secnet. Might be for a tunnel. */
297 for (i=0; i<st->n_routes; i++) {
298 if (st->routes[i].up && subnet_match(&st->routes[i].net,dest)) {
299 if (st->routes[i].c->link_quality>best_quality
300 || best_quality==0) {
301 best_quality=st->routes[i].c->link_quality;
303 /* If quality isn't perfect we may wish to
304 consider kicking the tunnel with a 0-length
305 packet to prompt it to perform a key setup.
306 Then it'll eventually decide it's up or
308 /* If quality is perfect we don't need to search
310 if (best_quality>=MAXIMUM_LINK_QUALITY) break;
314 if (best_match==-1) {
315 /* Not going down a tunnel. Might be for the host.
316 XXX think about this - only situation should be if we're
318 if (source!=st->secnet_address) {
319 Message(M_ERROR,"netlink_packet_deliver: outgoing packet "
320 "from host that won't fit down any of our tunnels!\n");
321 /* XXX I think this could also occur if a soft tunnel just
322 went down, but still had packets queued in the kernel. */
325 st->deliver_to_host(st->dst,NULL,buf);
326 BUF_ASSERT_FREE(buf);
329 if (best_quality>0) {
330 st->routes[best_match].c->deliver(
331 st->routes[best_match].c->dst,
332 st->routes[best_match].c, buf);
333 BUF_ASSERT_FREE(buf);
335 /* Generate ICMP destination unreachable */
336 netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
340 } else { /* client is set */
341 /* We know the origin is a tunnel - packet must be for the host */
342 /* XXX THIS IS NOT NECESSARILY TRUE, AND NEEDS FIXING */
343 /* THIS FUNCTION MUST JUST DELIVER THE PACKET: IT MUST ASSUME
344 THE PACKET HAS ALREADY BEEN CHECKED */
345 if (subnet_matches_list(&st->networks,dest)) {
346 st->deliver_to_host(st->dst,NULL,buf);
347 BUF_ASSERT_FREE(buf);
349 Message(M_ERROR,"%s: packet from tunnel %s can't be delivered "
350 "to the host\n",st->name,client->name);
351 netlink_icmp_simple(st,buf,client,3,0);
355 BUF_ASSERT_FREE(buf);
358 static void netlink_packet_forward(struct netlink *st,
359 struct netlink_client *client,
360 struct buffer_if *buf)
362 struct iphdr *iph=(struct iphdr *)buf->start;
364 BUF_ASSERT_USED(buf);
366 /* Packet has already been checked */
368 /* Generate ICMP time exceeded */
369 netlink_icmp_simple(st,buf,client,11,0);
375 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
377 netlink_packet_deliver(st,client,buf);
378 BUF_ASSERT_FREE(buf);
381 /* Deal with packets addressed explicitly to us */
382 static void netlink_packet_local(struct netlink *st,
383 struct netlink_client *client,
384 struct buffer_if *buf)
388 h=(struct icmphdr *)buf->start;
390 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
391 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
392 "ignoring it\n",st->name);
397 if (h->iph.protocol==1) {
399 if (h->type==8 && h->code==0) {
400 /* ICMP echo-request. Special case: we re-use the buffer
401 to construct the reply. */
403 h->iph.daddr=h->iph.saddr;
404 h->iph.saddr=htonl(st->secnet_address);
405 h->iph.ttl=255; /* Be nice and bump it up again... */
407 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
408 netlink_icmp_csum(h);
409 netlink_packet_deliver(st,NULL,buf);
412 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
414 /* Send ICMP protocol unreachable */
415 netlink_icmp_simple(st,buf,client,3,2);
423 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
425 static void netlink_incoming(void *sst, void *cid, struct buffer_if *buf)
427 struct netlink *st=sst;
428 struct netlink_client *client=cid;
429 uint32_t source,dest;
432 BUF_ASSERT_USED(buf);
433 if (!netlink_check(st,buf)) {
434 Message(M_WARNING,"%s: bad IP packet from %s\n",
435 st->name,client?client->name:"host");
439 iph=(struct iphdr *)buf->start;
441 source=ntohl(iph->saddr);
442 dest=ntohl(iph->daddr);
446 /* Check that the packet source is appropriate for the tunnel
448 if (!subnet_matches_list(client->networks,source)) {
450 s=ipaddr_to_string(source);
451 d=ipaddr_to_string(dest);
452 Message(M_WARNING,"%s: packet from tunnel %s with bad "
453 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
459 /* Check that the packet originates in our configured local
460 network, and hasn't been forwarded from elsewhere or
461 generated with the wrong source address */
462 if (!subnet_matches_list(&st->networks,source)) {
464 s=ipaddr_to_string(source);
465 d=ipaddr_to_string(dest);
466 Message(M_WARNING,"%s: outgoing packet with bad source address "
467 "(s=%s,d=%s)\n",st->name,s,d);
474 /* If this is a point-to-point device we don't examine the packet at
475 all; we blindly send it down our one-and-only registered tunnel,
476 or to the host, depending on where it came from. */
479 st->deliver_to_host(st->dst,NULL,buf);
481 st->clients->deliver(st->clients->dst,NULL,buf);
483 BUF_ASSERT_FREE(buf);
487 /* (st->secnet_address needs checking before matching destination
489 if (dest==st->secnet_address) {
490 netlink_packet_local(st,client,buf);
491 BUF_ASSERT_FREE(buf);
495 /* Check for free routing */
496 if (!subnet_matches_list(&st->networks,dest)) {
498 s=ipaddr_to_string(source);
499 d=ipaddr_to_string(dest);
500 Message(M_WARNING,"%s: incoming packet from tunnel %s "
501 "with bad destination address "
502 "(s=%s,d=%s)\n",st->name,client->name,s,d);
508 netlink_packet_forward(st,client,buf);
509 BUF_ASSERT_FREE(buf);
512 static void netlink_set_softlinks(struct netlink *st, struct netlink_client *c,
513 bool_t up, uint32_t quality)
517 if (!st->routes) return; /* Table has not yet been created */
518 for (i=0; i<st->n_routes; i++) {
519 if (st->routes[i].c==c) {
520 st->routes[i].quality=quality;
521 if (!st->routes[i].hard) {
523 st->set_route(st->dst,&st->routes[i]);
529 static void netlink_set_quality(void *sst, void *cid, uint32_t quality)
531 struct netlink *st=sst;
532 struct netlink_client *c=cid;
534 c->link_quality=quality;
535 if (c->link_quality==LINK_QUALITY_DOWN) {
536 netlink_set_softlinks(st,c,False,c->link_quality);
538 netlink_set_softlinks(st,c,True,c->link_quality);
542 static void *netlink_regnets(void *sst, struct subnet_list *nets,
543 netlink_deliver_fn *deliver, void *dst,
544 uint32_t max_start_pad, uint32_t max_end_pad,
545 uint32_t options, string_t client_name)
547 struct netlink *st=sst;
548 struct netlink_client *c;
550 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
551 "max_start_pad=%d, max_end_pad=%d\n",
552 nets->entries,max_start_pad,max_end_pad);
554 if ((options&NETLINK_OPTION_SOFTROUTE) && !st->set_route) {
555 Message(M_ERROR,"%s: this netlink device does not support "
560 if (options&NETLINK_OPTION_SOFTROUTE) {
561 /* XXX for now we assume that soft routes require root privilege;
562 this may not always be true. The device driver can tell us. */
563 require_root_privileges=True;
564 require_root_privileges_explanation="netlink: soft routes";
567 /* Check that nets do not intersect st->exclude_remote_networks;
568 refuse to register if they do. */
569 if (subnet_lists_intersect(&st->exclude_remote_networks,nets)) {
570 Message(M_ERROR,"%s: site %s specifies networks that "
571 "intersect with the explicitly excluded remote networks\n",
572 st->name,client_name);
576 if (st->clients && st->ptp) {
577 fatal("%s: only one site may use a point-to-point netlink device\n",
582 c=safe_malloc(sizeof(*c),"netlink_regnets");
588 c->link_quality=LINK_QUALITY_DOWN;
591 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
592 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
593 st->n_routes+=nets->entries;
598 static void netlink_dump_routes(struct netlink *st, bool_t requested)
604 if (requested) c=M_WARNING;
605 Message(c,"%s: routing table:\n",st->name);
606 for (i=0; i<st->n_routes; i++) {
607 net=subnet_to_string(&st->routes[i].net);
608 Message(c,"%s -> tunnel %s (%s,%s route,%s,quality %d)\n",net,
609 st->routes[i].c->name,
610 st->routes[i].hard?"hard":"soft",
611 st->routes[i].allow_route?"free":"restricted",
612 st->routes[i].up?"up":"down",
613 st->routes[i].quality);
616 Message(c,"%s/32 -> netlink \"%s\"\n",
617 ipaddr_to_string(st->secnet_address),st->name);
618 for (i=0; i<st->networks.entries; i++) {
619 net=subnet_to_string(&st->networks.list[i]);
620 Message(c,"%s -> host\n",net);
625 static int netlink_compare_route_specificity(const void *ap, const void *bp)
627 const struct netlink_route *a=ap;
628 const struct netlink_route *b=bp;
630 if (a->net.len==b->net.len) return 0;
631 if (a->net.len<b->net.len) return 1;
635 static void netlink_phase_hook(void *sst, uint32_t new_phase)
637 struct netlink *st=sst;
638 struct netlink_client *c;
641 if (!st->clients && st->ptp) {
642 /* Point-to-point netlink devices must have precisely one
643 client. If none has registered by now, complain. */
644 fatal("%s: point-to-point netlink devices must have precisely "
645 "one client. This one doesn't have any.\n",st->name);
648 /* All the networks serviced by the various tunnels should now
649 * have been registered. We build a routing table by sorting the
650 * routes into most-specific-first order. */
651 st->routes=safe_malloc(st->n_routes*sizeof(*st->routes),
652 "netlink_phase_hook");
655 for (c=st->clients; c; c=c->next) {
656 for (j=0; j<c->networks->entries; j++) {
657 st->routes[i].net=c->networks->list[j];
659 /* Hard routes are always up;
660 soft routes default to down */
661 st->routes[i].up=c->options&NETLINK_OPTION_SOFTROUTE?False:True;
662 st->routes[i].kup=False;
663 st->routes[i].hard=c->options&NETLINK_OPTION_SOFTROUTE?False:True;
664 st->routes[i].allow_route=c->options&NETLINK_OPTION_ALLOW_ROUTE?
666 st->routes[i].quality=c->link_quality;
670 /* ASSERT i==st->n_routes */
671 if (i!=st->n_routes) {
672 fatal("netlink: route count error: expected %d got %d\n",
675 /* Sort the table in descending order of specificity */
676 qsort(st->routes,st->n_routes,sizeof(*st->routes),
677 netlink_compare_route_specificity);
679 netlink_dump_routes(st,False);
682 static void netlink_signal_handler(void *sst, int signum)
684 struct netlink *st=sst;
685 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
686 netlink_dump_routes(st,True);
689 netlink_deliver_fn *netlink_init(struct netlink *st,
690 void *dst, struct cloc loc,
691 dict_t *dict, string_t description,
692 netlink_route_fn *set_route,
693 netlink_deliver_fn *to_host)
698 st->cl.description=description;
699 st->cl.type=CL_NETLINK;
701 st->cl.interface=&st->ops;
703 st->ops.regnets=netlink_regnets;
704 st->ops.deliver=netlink_incoming;
705 st->ops.set_quality=netlink_set_quality;
709 st->set_route=set_route;
710 st->deliver_to_host=to_host;
712 st->name=dict_read_string(dict,"name",False,"netlink",loc);
713 if (!st->name) st->name=description;
714 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
716 dict_read_subnet_list(dict, "exclude-remote-networks", False, "netlink",
717 loc, &st->exclude_remote_networks);
718 /* secnet-address does not have to be in local-networks;
719 however, it should be advertised in the 'sites' file for the
721 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
722 ptpa=dict_find_item(dict,"ptp-address", False, "netlink", loc);
724 cfgfatal(loc,st->name,"you may not specify secnet-address and "
725 "ptp-address in the same netlink device\n");
728 cfgfatal(loc,st->name,"you must specify secnet-address or "
729 "ptp-address for this netlink device\n");
732 st->secnet_address=string_to_ipaddr(sa,"netlink");
735 st->secnet_address=string_to_ipaddr(ptpa,"netlink");
738 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
739 buffer_new(&st->icmp,ICMP_BUFSIZE);
743 add_hook(PHASE_SETUP,netlink_phase_hook,st);
744 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
746 return netlink_incoming;
749 /* No connection to the kernel at all... */
755 static bool_t null_set_route(void *sst, struct netlink_route *route)
760 if (route->up!=route->kup) {
761 t=subnet_to_string(&route->net);
762 Message(M_INFO,"%s: setting route %s to state %s\n",st->nl.name,
763 t, route->up?"up":"down");
765 route->kup=route->up;
771 static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
776 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
783 st=safe_malloc(sizeof(*st),"null_apply");
785 item=list_elem(args,0);
786 if (!item || item->type!=t_dict)
787 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
789 dict=item->data.dict;
791 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
794 return new_closure(&st->nl.cl);
797 init_module netlink_module;
798 void netlink_module(dict_t *dict)
800 add_closure(dict,"null-netlink",null_apply);