1 /* User-kernel network link */
3 /* Each netlink device is actually a router, with its own IP address.
4 We do things like decreasing the TTL and recalculating the header
5 checksum, generating ICMP, responding to pings, etc. */
7 /* This is where we have the anti-spoofing paranoia - before sending a
8 packet to the kernel we check that the tunnel it came over could
9 reasonably have produced it. */
16 /* Generic IP checksum routine */
17 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
19 register uint32_t sum=0;
22 sum+=ntohs(*(uint16_t *)iph);
29 sum=(sum&0xffff)+(sum>>16);
35 * This is a version of ip_compute_csum() optimized for IP headers,
36 * which always checksum on 4 octet boundaries.
38 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
41 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
44 __asm__ __volatile__("
63 /* Since the input registers which are loaded with iph and ipl
64 are modified, we must also specify them as outputs, or gcc
65 will assume they contain their original values. */
66 : "=r" (sum), "=r" (iph), "=r" (ihl)
67 : "1" (iph), "2" (ihl));
71 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
73 return ip_csum(iph,ihl*4);
78 #if defined (WORDS_BIGENDIAN)
94 /* The options start here. */
117 static void netlink_packet_deliver(struct netlink *st,
118 struct netlink_client *client,
119 struct buffer_if *buf);
121 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
122 uint32_t dest,uint16_t len)
126 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
127 buffer_init(&st->icmp,st->max_start_pad);
128 h=buf_append(&st->icmp,sizeof(*h));
133 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
138 h->iph.saddr=htonl(st->secnet_address);
139 h->iph.daddr=htonl(dest);
141 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
148 /* Fill in the ICMP checksum field correctly */
149 static void netlink_icmp_csum(struct icmphdr *h)
153 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
155 h->check=ip_csum(&h->type,len);
159 * An ICMP error message MUST NOT be sent as the result of
162 * * an ICMP error message, or
164 * * a datagram destined to an IP broadcast or IP multicast
167 * * a datagram sent as a link-layer broadcast, or
169 * * a non-initial fragment, or
171 * * a datagram whose source address does not define a single
172 * host -- e.g., a zero address, a loopback address, a
173 * broadcast address, a multicast address, or a Class E
176 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
181 iph=(struct iphdr *)buf->start;
182 if (iph->protocol==1) return False; /* Overly-broad; we may reply to
183 eg. icmp echo-request */
184 /* How do we spot broadcast destination addresses? */
185 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
186 source=ntohl(iph->saddr);
187 if (source==0) return False;
188 if ((source&0xff000000)==0x7f000000) return False;
189 /* How do we spot broadcast source addresses? */
190 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
191 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
195 /* How much of the original IP packet do we include in its ICMP
196 response? The header plus up to 64 bits. */
197 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
199 struct iphdr *iph=(struct iphdr *)buf->start;
203 /* We include the first 8 bytes of the packet data, provided they exist */
205 plen=ntohs(iph->tot_len);
206 return (hlen>plen?plen:hlen);
209 /* client indicates where the packet we're constructing a response to
210 comes from. NULL indicates the host. */
211 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
212 struct netlink_client *client,
213 uint8_t type, uint8_t code)
215 struct iphdr *iph=(struct iphdr *)buf->start;
219 if (netlink_icmp_may_reply(buf)) {
220 len=netlink_icmp_reply_len(buf);
221 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
222 h->type=type; h->code=code;
223 memcpy(buf_append(&st->icmp,len),buf->start,len);
224 netlink_icmp_csum(h);
225 netlink_packet_deliver(st,NULL,&st->icmp);
226 BUF_ASSERT_FREE(&st->icmp);
231 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
234 * Is the datagram acceptable?
236 * 1. Length at least the size of an ip header
238 * 3. Checksums correctly.
239 * 4. Doesn't have a bogus length
241 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
243 struct iphdr *iph=(struct iphdr *)buf->start;
246 if (iph->ihl < 5 || iph->version != 4) return False;
247 if (buf->size < iph->ihl*4) return False;
248 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
249 len=ntohs(iph->tot_len);
250 /* There should be no padding */
251 if (buf->size!=len || len<(iph->ihl<<2)) return False;
252 /* XXX check that there's no source route specified */
256 /* Deliver a packet. "client" points to the _origin_ of the packet, not
257 its destination. (May be used when sending ICMP response - avoid
258 asymmetric routing.) */
259 static void netlink_packet_deliver(struct netlink *st,
260 struct netlink_client *client,
261 struct buffer_if *buf)
263 struct iphdr *iph=(struct iphdr *)buf->start;
264 uint32_t dest=ntohl(iph->daddr);
265 uint32_t source=ntohl(iph->saddr);
266 uint32_t best_quality;
270 BUF_ASSERT_USED(buf);
272 if (dest==st->secnet_address) {
273 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
278 /* XXX we're going to need an extra value 'allow_route' for the
279 source of the packet. It's always True for packets from the
280 host. For packets from tunnels, we consult the client
281 options. If !allow_route and the destination is a tunnel that
282 also doesn't allow routing, we must reject the packet with an
283 'administratively prohibited' or something similar ICMP. */
285 /* Origin of packet is host or secnet. Might be for a tunnel. */
288 for (i=0; i<st->n_routes; i++) {
289 if (st->routes[i].up && subnet_match(&st->routes[i].net,dest)) {
290 if (st->routes[i].c->link_quality>best_quality
291 || best_quality==0) {
292 best_quality=st->routes[i].c->link_quality;
294 /* If quality isn't perfect we may wish to
295 consider kicking the tunnel with a 0-length
296 packet to prompt it to perform a key setup.
297 Then it'll eventually decide it's up or
299 /* If quality is perfect we don't need to search
301 if (best_quality>=MAXIMUM_LINK_QUALITY) break;
305 if (best_match==-1) {
306 /* Not going down a tunnel. Might be for the host.
307 XXX think about this - only situation should be if we're
309 if (source!=st->secnet_address) {
310 Message(M_ERROR,"netlink_packet_deliver: outgoing packet "
311 "from host that won't fit down any of our tunnels!\n");
312 /* XXX I think this could also occur if a soft tunnel just
313 went down, but still had packets queued in the kernel. */
316 st->deliver_to_host(st->dst,NULL,buf);
317 BUF_ASSERT_FREE(buf);
320 if (best_quality>0) {
321 st->routes[best_match].c->deliver(
322 st->routes[best_match].c->dst,
323 st->routes[best_match].c, buf);
324 BUF_ASSERT_FREE(buf);
326 /* Generate ICMP destination unreachable */
327 netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
331 } else { /* client is set */
332 /* We know the origin is a tunnel - packet must be for the host */
333 /* XXX THIS IS NOT NECESSARILY TRUE, AND NEEDS FIXING */
334 /* THIS FUNCTION MUST JUST DELIVER THE PACKET: IT MUST ASSUME
335 THE PACKET HAS ALREADY BEEN CHECKED */
336 if (subnet_matches_list(&st->networks,dest)) {
337 st->deliver_to_host(st->dst,NULL,buf);
338 BUF_ASSERT_FREE(buf);
340 Message(M_ERROR,"%s: packet from tunnel %s can't be delivered "
341 "to the host\n",st->name,client->name);
342 netlink_icmp_simple(st,buf,client,3,0);
346 BUF_ASSERT_FREE(buf);
349 static void netlink_packet_forward(struct netlink *st,
350 struct netlink_client *client,
351 struct buffer_if *buf)
353 struct iphdr *iph=(struct iphdr *)buf->start;
355 BUF_ASSERT_USED(buf);
357 /* Packet has already been checked */
359 /* Generate ICMP time exceeded */
360 netlink_icmp_simple(st,buf,client,11,0);
366 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
368 netlink_packet_deliver(st,client,buf);
369 BUF_ASSERT_FREE(buf);
372 /* Deal with packets addressed explicitly to us */
373 static void netlink_packet_local(struct netlink *st,
374 struct netlink_client *client,
375 struct buffer_if *buf)
379 h=(struct icmphdr *)buf->start;
381 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
382 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
383 "ignoring it\n",st->name);
388 if (h->iph.protocol==1) {
390 if (h->type==8 && h->code==0) {
391 /* ICMP echo-request. Special case: we re-use the buffer
392 to construct the reply. */
394 h->iph.daddr=h->iph.saddr;
395 h->iph.saddr=htonl(st->secnet_address);
396 h->iph.ttl=255; /* Be nice and bump it up again... */
398 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
399 netlink_icmp_csum(h);
400 netlink_packet_deliver(st,NULL,buf);
403 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
405 /* Send ICMP protocol unreachable */
406 netlink_icmp_simple(st,buf,client,3,2);
414 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
416 static void netlink_incoming(void *sst, void *cid, struct buffer_if *buf)
418 struct netlink *st=sst;
419 struct netlink_client *client=cid;
420 uint32_t source,dest;
423 BUF_ASSERT_USED(buf);
424 if (!netlink_check(st,buf)) {
425 Message(M_WARNING,"%s: bad IP packet from %s\n",
426 st->name,client?client->name:"host");
430 iph=(struct iphdr *)buf->start;
432 source=ntohl(iph->saddr);
433 dest=ntohl(iph->daddr);
437 /* Check that the packet source is in 'nets' and its destination is
439 if (!subnet_matches_list(client->networks,source)) {
441 s=ipaddr_to_string(source);
442 d=ipaddr_to_string(dest);
443 Message(M_WARNING,"%s: packet from tunnel %s with bad "
444 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
450 if (!subnet_matches_list(&st->networks,source)) {
452 s=ipaddr_to_string(source);
453 d=ipaddr_to_string(dest);
454 Message(M_WARNING,"%s: outgoing packet with bad source address "
455 "(s=%s,d=%s)\n",st->name,s,d);
461 /* (st->secnet_address needs checking before matching destination
463 if (dest==st->secnet_address) {
464 netlink_packet_local(st,client,buf);
465 BUF_ASSERT_FREE(buf);
469 /* Check for free routing */
470 if (!subnet_matches_list(&st->networks,dest)) {
472 s=ipaddr_to_string(source);
473 d=ipaddr_to_string(dest);
474 Message(M_WARNING,"%s: incoming packet from tunnel %s "
475 "with bad destination address "
476 "(s=%s,d=%s)\n",st->name,client->name,s,d);
482 netlink_packet_forward(st,client,buf);
483 BUF_ASSERT_FREE(buf);
486 static void netlink_set_softlinks(struct netlink *st, struct netlink_client *c,
491 if (!st->routes) return; /* Table has not yet been created */
492 for (i=0; i<st->n_routes; i++) {
493 if (!st->routes[i].hard && st->routes[i].c==c) {
495 st->set_route(st->dst,&st->routes[i]);
500 static void netlink_set_quality(void *sst, void *cid, uint32_t quality)
502 struct netlink *st=sst;
503 struct netlink_client *c=cid;
505 c->link_quality=quality;
506 if (c->link_quality==LINK_QUALITY_DOWN) {
507 netlink_set_softlinks(st,c,False);
509 netlink_set_softlinks(st,c,True);
513 static void *netlink_regnets(void *sst, struct subnet_list *nets,
514 netlink_deliver_fn *deliver, void *dst,
515 uint32_t max_start_pad, uint32_t max_end_pad,
516 uint32_t options, string_t client_name)
518 struct netlink *st=sst;
519 struct netlink_client *c;
521 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
522 "max_start_pad=%d, max_end_pad=%d\n",
523 nets->entries,max_start_pad,max_end_pad);
525 if ((options&NETLINK_OPTION_SOFTROUTE) && !st->set_route) {
526 Message(M_ERROR,"%s: this netlink device does not support "
531 if (options&NETLINK_OPTION_SOFTROUTE) {
532 /* XXX for now we assume that soft routes require root privilege;
533 this may not always be true. The device driver can tell us. */
534 require_root_privileges=True;
535 require_root_privileges_explanation="netlink: soft routes";
538 /* Check that nets do not intersect st->exclude_remote_networks;
539 refuse to register if they do. */
540 if (subnet_lists_intersect(&st->exclude_remote_networks,nets)) {
541 Message(M_ERROR,"%s: site %s specifies networks that "
542 "intersect with the explicitly excluded remote networks\n",
543 st->name,client_name);
547 c=safe_malloc(sizeof(*c),"netlink_regnets");
551 c->name=client_name; /* XXX copy it? */
553 c->link_quality=LINK_QUALITY_DOWN;
556 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
557 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
558 st->n_routes+=nets->entries;
563 static void netlink_dump_routes(struct netlink *st)
568 Message(M_INFO,"%s: routing table:\n",st->name);
569 for (i=0; i<st->n_routes; i++) {
570 net=subnet_to_string(&st->routes[i].net);
571 Message(M_INFO,"%s -> tunnel %s (%s,%s route,%s)\n",net,
572 st->routes[i].c->name,
573 st->routes[i].hard?"hard":"soft",
574 st->routes[i].allow_route?"free":"restricted",
575 st->routes[i].up?"up":"down");
578 Message(M_INFO,"%s/32 -> netlink \"%s\"\n",
579 ipaddr_to_string(st->secnet_address),st->name);
580 for (i=0; i<st->networks.entries; i++) {
581 net=subnet_to_string(&st->networks.list[i]);
582 Message(M_INFO,"%s -> host\n",net);
587 static int netlink_compare_route_specificity(const void *ap, const void *bp)
589 const struct netlink_route *a=ap;
590 const struct netlink_route *b=bp;
592 if (a->net.len==b->net.len) return 0;
593 if (a->net.len<b->net.len) return 1;
597 static void netlink_phase_hook(void *sst, uint32_t new_phase)
599 struct netlink *st=sst;
600 struct netlink_client *c;
603 /* All the networks serviced by the various tunnels should now
604 * have been registered. We build a routing table by sorting the
605 * routes into most-specific-first order. */
606 st->routes=safe_malloc(st->n_routes*sizeof(*st->routes),
607 "netlink_phase_hook");
610 for (c=st->clients; c; c=c->next) {
611 for (j=0; j<c->networks->entries; j++) {
612 st->routes[i].net=c->networks->list[j];
614 /* Hard routes are always up;
615 soft routes default to down */
616 st->routes[i].up=c->options&NETLINK_OPTION_SOFTROUTE?False:True;
617 st->routes[i].kup=False;
618 st->routes[i].hard=c->options&NETLINK_OPTION_SOFTROUTE?False:True;
619 st->routes[i].allow_route=c->options&NETLINK_OPTION_ALLOW_ROUTE?
624 /* ASSERT i==st->n_routes */
625 if (i!=st->n_routes) {
626 fatal("netlink: route count error: expected %d got %d\n",
629 /* Sort the table in descending order of specificity */
630 qsort(st->routes,st->n_routes,sizeof(*st->routes),
631 netlink_compare_route_specificity);
633 netlink_dump_routes(st);
636 netlink_deliver_fn *netlink_init(struct netlink *st,
637 void *dst, struct cloc loc,
638 dict_t *dict, string_t description,
639 netlink_route_fn *set_route,
640 netlink_deliver_fn *to_host)
643 st->cl.description=description;
644 st->cl.type=CL_NETLINK;
646 st->cl.interface=&st->ops;
648 st->ops.regnets=netlink_regnets;
649 st->ops.deliver=netlink_incoming;
650 st->ops.set_quality=netlink_set_quality;
654 st->set_route=set_route;
655 st->deliver_to_host=to_host;
657 st->name=dict_read_string(dict,"name",False,"netlink",loc);
658 if (!st->name) st->name=description;
659 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
661 dict_read_subnet_list(dict, "exclude-remote-networks", False, "netlink",
662 loc, &st->exclude_remote_networks);
663 /* secnet-address does not have to be in local-networks;
664 however, it should be advertised in the 'sites' file for the
666 st->secnet_address=string_to_ipaddr(
667 dict_find_item(dict,"secnet-address", True, "netlink", loc),"netlink");
668 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
669 buffer_new(&st->icmp,ICMP_BUFSIZE);
673 add_hook(PHASE_SETUP,netlink_phase_hook,st);
675 return netlink_incoming;
678 /* No connection to the kernel at all... */
684 static bool_t null_set_route(void *sst, struct netlink_route *route)
689 if (route->up!=route->kup) {
690 t=subnet_to_string(&route->net);
691 Message(M_INFO,"%s: setting route %s to state %s\n",st->nl.name,
692 t, route->up?"up":"down");
694 route->kup=route->up;
700 static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
705 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
712 st=safe_malloc(sizeof(*st),"null_apply");
714 item=list_elem(args,0);
715 if (!item || item->type!=t_dict)
716 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
718 dict=item->data.dict;
720 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
723 return new_closure(&st->nl.cl);
726 init_module netlink_module;
727 void netlink_module(dict_t *dict)
729 add_closure(dict,"null-netlink",null_apply);