1 /* User-kernel network link */
3 /* Each netlink device is actually a router, with its own IP address.
4 We do things like decreasing the TTL and recalculating the header
5 checksum, generating ICMP, responding to pings, etc. */
7 /* This is where we have the anti-spoofing paranoia - before sending a
8 packet to the kernel we check that the tunnel it came over could
9 reasonably have produced it. */
17 #define OPT_SOFTROUTE 1
18 #define OPT_ALLOWROUTE 2
20 /* Generic IP checksum routine */
21 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
23 register uint32_t sum=0;
26 sum+=ntohs(*(uint16_t *)iph);
33 sum=(sum&0xffff)+(sum>>16);
39 * This is a version of ip_compute_csum() optimized for IP headers,
40 * which always checksum on 4 octet boundaries.
42 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
45 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
48 __asm__ __volatile__("
67 /* Since the input registers which are loaded with iph and ipl
68 are modified, we must also specify them as outputs, or gcc
69 will assume they contain their original values. */
70 : "=r" (sum), "=r" (iph), "=r" (ihl)
71 : "1" (iph), "2" (ihl));
75 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
77 return ip_csum(iph,ihl*4);
82 #if defined (WORDS_BIGENDIAN)
98 /* The options start here. */
121 static void netlink_packet_deliver(struct netlink *st,
122 struct netlink_client *client,
123 struct buffer_if *buf);
125 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
126 uint32_t dest,uint16_t len)
130 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
131 buffer_init(&st->icmp,st->max_start_pad);
132 h=buf_append(&st->icmp,sizeof(*h));
137 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
142 h->iph.saddr=htonl(st->secnet_address);
143 h->iph.daddr=htonl(dest);
145 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
152 /* Fill in the ICMP checksum field correctly */
153 static void netlink_icmp_csum(struct icmphdr *h)
157 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
159 h->check=ip_csum(&h->type,len);
163 * An ICMP error message MUST NOT be sent as the result of
166 * * an ICMP error message, or
168 * * a datagram destined to an IP broadcast or IP multicast
171 * * a datagram sent as a link-layer broadcast, or
173 * * a non-initial fragment, or
175 * * a datagram whose source address does not define a single
176 * host -- e.g., a zero address, a loopback address, a
177 * broadcast address, a multicast address, or a Class E
180 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
183 struct icmphdr *icmph;
186 iph=(struct iphdr *)buf->start;
187 icmph=(struct icmphdr *)buf->start;
188 if (iph->protocol==1) {
189 switch(icmph->type) {
190 case 3: /* Destination unreachable */
191 case 11: /* Time Exceeded */
192 case 12: /* Parameter Problem */
196 /* How do we spot broadcast destination addresses? */
197 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
198 source=ntohl(iph->saddr);
199 if (source==0) return False;
200 if ((source&0xff000000)==0x7f000000) return False;
201 /* How do we spot broadcast source addresses? */
202 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
203 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
207 /* How much of the original IP packet do we include in its ICMP
208 response? The header plus up to 64 bits. */
209 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
211 struct iphdr *iph=(struct iphdr *)buf->start;
215 /* We include the first 8 bytes of the packet data, provided they exist */
217 plen=ntohs(iph->tot_len);
218 return (hlen>plen?plen:hlen);
221 /* client indicates where the packet we're constructing a response to
222 comes from. NULL indicates the host. */
223 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
224 struct netlink_client *client,
225 uint8_t type, uint8_t code)
227 struct iphdr *iph=(struct iphdr *)buf->start;
231 if (netlink_icmp_may_reply(buf)) {
232 len=netlink_icmp_reply_len(buf);
233 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
234 h->type=type; h->code=code;
235 memcpy(buf_append(&st->icmp,len),buf->start,len);
236 netlink_icmp_csum(h);
237 netlink_packet_deliver(st,NULL,&st->icmp);
238 BUF_ASSERT_FREE(&st->icmp);
243 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
246 * Is the datagram acceptable?
248 * 1. Length at least the size of an ip header
250 * 3. Checksums correctly.
251 * 4. Doesn't have a bogus length
253 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
255 struct iphdr *iph=(struct iphdr *)buf->start;
258 if (iph->ihl < 5 || iph->version != 4) return False;
259 if (buf->size < iph->ihl*4) return False;
260 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
261 len=ntohs(iph->tot_len);
262 /* There should be no padding */
263 if (buf->size!=len || len<(iph->ihl<<2)) return False;
264 /* XXX check that there's no source route specified */
268 /* Deliver a packet. "client" is the _origin_ of the packet, not its
270 static void netlink_packet_deliver(struct netlink *st,
271 struct netlink_client *client,
272 struct buffer_if *buf)
274 struct iphdr *iph=(struct iphdr *)buf->start;
275 uint32_t dest=ntohl(iph->daddr);
276 uint32_t source=ntohl(iph->saddr);
277 uint32_t best_quality;
278 bool_t allow_route=False;
279 bool_t found_allowed=False;
283 BUF_ASSERT_USED(buf);
285 if (dest==st->secnet_address) {
286 Message(M_ERR,"%s: trying to deliver a packet to myself!\n");
291 /* Packets from the host (client==NULL) will always be routed. Packets
292 from clients with the allow_route option will also be routed. */
293 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
296 /* If !allow_route, we check the routing table anyway, and if
297 there's a suitable route with OPT_ALLOWROUTE set we use it. If
298 there's a suitable route, but none with OPT_ALLOWROUTE set then
299 we generate ICMP 'communication with destination network
300 administratively prohibited'. */
304 for (i=0; i<st->n_routes; i++) {
305 if (st->routes[i].up && subnet_match(st->routes[i].net,dest)) {
306 /* It's an available route to the correct destination. But is
307 it better than the one we already have? */
309 /* If we have already found an allowed route then we don't
310 bother looking at routes we're not allowed to use. If
311 we don't yet have an allowed route we'll consider any. */
312 if (!allow_route && found_allowed) {
313 if (!(st->routes[i].c->options&OPT_ALLOWROUTE)) continue;
316 if (st->routes[i].c->link_quality>best_quality
317 || best_quality==0) {
318 best_quality=st->routes[i].c->link_quality;
320 if (st->routes[i].c->options&OPT_ALLOWROUTE)
322 /* If quality isn't perfect we may wish to
323 consider kicking the tunnel with a 0-length
324 packet to prompt it to perform a key setup.
325 Then it'll eventually decide it's up or
327 /* If quality is perfect and we're allowed to use the
328 route we don't need to search any more. */
329 if (best_quality>=MAXIMUM_LINK_QUALITY &&
330 (allow_route || found_allowed)) break;
334 if (best_match==-1) {
335 /* The packet's not going down a tunnel. It might (ought to)
337 if (ipset_contains_addr(st->networks,dest)) {
338 st->deliver_to_host(st->dst,buf);
340 BUF_ASSERT_FREE(buf);
343 s=ipaddr_to_string(source);
344 d=ipaddr_to_string(dest);
345 Message(M_ERR,"%s: don't know where to deliver packet "
346 "(s=%s, d=%s)\n", st->name, s, d);
348 netlink_icmp_simple(st,buf,client,3,0);
353 !(st->routes[best_match].c->options&OPT_ALLOWROUTE)) {
355 s=ipaddr_to_string(source);
356 d=ipaddr_to_string(dest);
357 /* We have a usable route but aren't allowed to use it.
358 Generate ICMP destination unreachable: communication
359 with destination network administratively prohibited */
360 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
364 netlink_icmp_simple(st,buf,client,3,9);
367 if (best_quality>0) {
368 st->routes[best_match].c->deliver(
369 st->routes[best_match].c->dst, buf);
370 st->routes[best_match].outcount++;
371 BUF_ASSERT_FREE(buf);
373 /* Generate ICMP destination unreachable */
374 netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
378 BUF_ASSERT_FREE(buf);
381 static void netlink_packet_forward(struct netlink *st,
382 struct netlink_client *client,
383 struct buffer_if *buf)
385 struct iphdr *iph=(struct iphdr *)buf->start;
387 BUF_ASSERT_USED(buf);
389 /* Packet has already been checked */
391 /* Generate ICMP time exceeded */
392 netlink_icmp_simple(st,buf,client,11,0);
398 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
400 netlink_packet_deliver(st,client,buf);
401 BUF_ASSERT_FREE(buf);
404 /* Deal with packets addressed explicitly to us */
405 static void netlink_packet_local(struct netlink *st,
406 struct netlink_client *client,
407 struct buffer_if *buf)
413 h=(struct icmphdr *)buf->start;
415 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
416 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
417 "ignoring it\n",st->name);
422 if (h->iph.protocol==1) {
424 if (h->type==8 && h->code==0) {
425 /* ICMP echo-request. Special case: we re-use the buffer
426 to construct the reply. */
428 h->iph.daddr=h->iph.saddr;
429 h->iph.saddr=htonl(st->secnet_address);
430 h->iph.ttl=255; /* Be nice and bump it up again... */
432 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
433 netlink_icmp_csum(h);
434 netlink_packet_deliver(st,NULL,buf);
437 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
439 /* Send ICMP protocol unreachable */
440 netlink_icmp_simple(st,buf,client,3,2);
448 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
450 static void netlink_incoming(struct netlink *st, struct netlink_client *client,
451 struct buffer_if *buf)
453 uint32_t source,dest;
456 BUF_ASSERT_USED(buf);
457 if (!netlink_check(st,buf)) {
458 Message(M_WARNING,"%s: bad IP packet from %s\n",
459 st->name,client?client->name:"host");
463 iph=(struct iphdr *)buf->start;
465 source=ntohl(iph->saddr);
466 dest=ntohl(iph->daddr);
469 /* XXX consider generating ICMP if we're not point-to-point and we
470 don't like the packet */
472 /* Check that the packet source is appropriate for the tunnel
474 if (!ipset_contains_addr(client->networks,source)) {
476 s=ipaddr_to_string(source);
477 d=ipaddr_to_string(dest);
478 Message(M_WARNING,"%s: packet from tunnel %s with bad "
479 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
485 /* Check that the packet originates in our configured local
486 network, and hasn't been forwarded from elsewhere or
487 generated with the wrong source address */
488 if (!ipset_contains_addr(st->networks,source)) {
490 s=ipaddr_to_string(source);
491 d=ipaddr_to_string(dest);
492 Message(M_WARNING,"%s: outgoing packet with bad source address "
493 "(s=%s,d=%s)\n",st->name,s,d);
500 /* If this is a point-to-point device we don't examine the
501 destination address at all; we blindly send it down our
502 one-and-only registered tunnel, or to the host, depending on
503 where it came from. */
504 /* XXX I think we should check destination addresses */
507 st->deliver_to_host(st->dst,buf);
509 st->clients->deliver(st->clients->dst,buf);
511 BUF_ASSERT_FREE(buf);
515 /* (st->secnet_address needs checking before matching destination
517 if (dest==st->secnet_address) {
518 netlink_packet_local(st,client,buf);
519 BUF_ASSERT_FREE(buf);
522 netlink_packet_forward(st,client,buf);
523 BUF_ASSERT_FREE(buf);
526 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
528 struct netlink_client *c=sst;
529 struct netlink *st=c->nst;
531 netlink_incoming(st,c,buf);
534 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
536 struct netlink *st=sst;
538 netlink_incoming(st,NULL,buf);
541 static void netlink_set_softlinks(struct netlink *st, struct netlink_client *c,
542 bool_t up, uint32_t quality)
546 if (!st->routes) return; /* Table has not yet been created */
547 for (i=0; i<st->n_routes; i++) {
548 if (st->routes[i].c==c) {
549 st->routes[i].quality=quality;
550 if (!st->routes[i].hard) {
552 st->set_route(st->dst,&st->routes[i]);
558 static void netlink_set_quality(void *sst, uint32_t quality)
560 struct netlink_client *c=sst;
561 struct netlink *st=c->nst;
563 c->link_quality=quality;
564 if (c->link_quality==LINK_QUALITY_DOWN) {
565 netlink_set_softlinks(st,c,False,c->link_quality);
567 netlink_set_softlinks(st,c,True,c->link_quality);
571 static void netlink_dump_routes(struct netlink *st, bool_t requested)
577 if (requested) c=M_WARNING;
579 net=ipaddr_to_string(st->secnet_address);
580 Message(c,"%s: point-to-point (remote end is %s); routes:\n",
583 for (i=0; i<st->n_routes; i++) {
584 net=subnet_to_string(st->routes[i].net);
585 Message(c,"%s ",net);
590 Message(c,"%s: routing table:\n",st->name);
591 for (i=0; i<st->n_routes; i++) {
592 net=subnet_to_string(st->routes[i].net);
593 Message(c,"%s -> tunnel %s (%s,%s route,%s,quality %d,use %d)\n",net,
594 st->routes[i].c->name,
595 st->routes[i].hard?"hard":"soft",
596 st->routes[i].allow_route?"free":"restricted",
597 st->routes[i].up?"up":"down",
598 st->routes[i].quality,
599 st->routes[i].outcount);
602 net=ipaddr_to_string(st->secnet_address);
603 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
604 net,st->name,st->localcount);
606 for (i=0; i<st->subnets->entries; i++) {
607 net=subnet_to_string(st->subnets->list[i]);
608 Message(c,"%s ",net);
612 Message(c,"-> host (use %d)\n",st->outcount);
616 static int netlink_compare_route_specificity(const void *ap, const void *bp)
618 const struct netlink_route *a=ap;
619 const struct netlink_route *b=bp;
621 if (a->net.len==b->net.len) return 0;
622 if (a->net.len<b->net.len) return 1;
626 static void netlink_phase_hook(void *sst, uint32_t new_phase)
628 struct netlink *st=sst;
629 struct netlink_client *c;
632 /* All the networks serviced by the various tunnels should now
633 * have been registered. We build a routing table by sorting the
634 * routes into most-specific-first order. */
635 st->routes=safe_malloc(st->n_routes*sizeof(*st->routes),
636 "netlink_phase_hook");
639 for (c=st->clients; c; c=c->next) {
640 for (j=0; j<c->subnets->entries; j++) {
641 st->routes[i].net=c->subnets->list[j];
643 /* Hard routes are always up;
644 soft routes default to down; routes with no 'deliver' function
646 st->routes[i].up=c->deliver?
647 (c->options&OPT_SOFTROUTE?False:True):
649 st->routes[i].kup=False;
650 st->routes[i].hard=c->options&OPT_SOFTROUTE?False:True;
651 st->routes[i].allow_route=c->options&OPT_ALLOWROUTE?
653 st->routes[i].quality=c->link_quality;
654 st->routes[i].outcount=0;
658 /* ASSERT i==st->n_routes */
659 if (i!=st->n_routes) {
660 fatal("netlink: route count error: expected %d got %d\n",
663 /* Sort the table in descending order of specificity */
664 qsort(st->routes,st->n_routes,sizeof(*st->routes),
665 netlink_compare_route_specificity);
667 netlink_dump_routes(st,False);
670 static void netlink_signal_handler(void *sst, int signum)
672 struct netlink *st=sst;
673 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
674 netlink_dump_routes(st,True);
677 static void netlink_inst_output_config(void *sst, struct buffer_if *buf)
679 /* struct netlink_client *c=sst; */
680 /* struct netlink *st=c->nst; */
682 /* For now we don't output anything */
683 BUF_ASSERT_USED(buf);
686 static bool_t netlink_inst_check_config(void *sst, struct buffer_if *buf)
688 /* struct netlink_client *c=sst; */
689 /* struct netlink *st=c->nst; */
691 BUF_ASSERT_USED(buf);
692 /* We need to eat all of the configuration information from the buffer
693 for backward compatibility. */
698 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
699 void *dst, uint32_t max_start_pad,
700 uint32_t max_end_pad)
702 struct netlink_client *c=sst;
703 struct netlink *st=c->nst;
705 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
706 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
711 static struct flagstr netlink_option_table[]={
712 { "soft", OPT_SOFTROUTE },
713 { "allow-route", OPT_ALLOWROUTE },
716 /* This is the routine that gets called when the closure that's
717 returned by an invocation of a netlink device closure (eg. tun,
718 userv-ipif) is invoked. It's used to create routes and pass in
719 information about them; the closure it returns is used by site
721 static closure_t *netlink_inst_create(struct netlink *st,
722 struct cloc loc, dict_t *dict)
724 struct netlink_client *c;
726 struct ipset *networks;
730 name=dict_read_string(dict, "name", True, st->name, loc);
732 l=dict_lookup(dict,"routes");
734 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
735 networks=string_list_to_ipset(l,loc,st->name,"routes");
736 options=string_list_to_word(dict_lookup(dict,"options"),
737 netlink_option_table,st->name);
739 if ((options&OPT_SOFTROUTE) && !st->set_route) {
740 cfgfatal(loc,st->name,"this netlink device does not support "
745 if (options&OPT_SOFTROUTE) {
746 /* XXX for now we assume that soft routes require root privilege;
747 this may not always be true. The device driver can tell us. */
748 require_root_privileges=True;
749 require_root_privileges_explanation="netlink: soft routes";
751 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
757 /* Check that nets are a subset of st->remote_networks;
758 refuse to register if they are not. */
759 if (!ipset_is_subset(st->remote_networks,networks)) {
760 cfgfatal(loc,st->name,"routes are not allowed\n");
764 c=safe_malloc(sizeof(*c),"netlink_inst_create");
765 c->cl.description=name;
766 c->cl.type=CL_NETLINK;
768 c->cl.interface=&c->ops;
770 c->ops.reg=netlink_inst_reg;
771 c->ops.deliver=netlink_inst_incoming;
772 c->ops.set_quality=netlink_set_quality;
773 c->ops.output_config=netlink_inst_output_config;
774 c->ops.check_config=netlink_inst_check_config;
777 c->networks=networks;
778 c->subnets=ipset_to_subnet_list(networks);
783 c->link_quality=LINK_QUALITY_DOWN;
786 st->n_routes+=c->subnets->entries;
791 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
792 dict_t *context, list_t *args)
794 struct netlink *st=self->interface;
800 item=list_elem(args,0);
801 if (!item || item->type!=t_dict) {
802 cfgfatal(loc,st->name,"must have a dictionary argument\n");
804 dict=item->data.dict;
806 cl=netlink_inst_create(st,loc,dict);
808 return new_closure(cl);
811 netlink_deliver_fn *netlink_init(struct netlink *st,
812 void *dst, struct cloc loc,
813 dict_t *dict, string_t description,
814 netlink_route_fn *set_route,
815 netlink_deliver_fn *to_host)
821 st->cl.description=description;
823 st->cl.apply=netlink_inst_apply;
828 st->set_route=set_route;
829 st->deliver_to_host=to_host;
831 st->name=dict_read_string(dict,"name",False,description,loc);
832 if (!st->name) st->name=description;
833 l=dict_lookup(dict,"networks");
835 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
837 Message(M_WARNING,"%s: no local networks (parameter \"networks\") "
838 "defined\n",st->name);
839 st->networks=ipset_new();
841 l=dict_lookup(dict,"remote-networks");
843 st->remote_networks=string_list_to_ipset(l,loc,st->name,
848 st->remote_networks=ipset_complement(empty);
852 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
853 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
855 cfgfatal(loc,st->name,"you may not specify secnet-address and "
856 "ptp-address in the same netlink device\n");
859 cfgfatal(loc,st->name,"you must specify secnet-address or "
860 "ptp-address for this netlink device\n");
863 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
866 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
869 /* XXX we may want to subtract secnet_address from networks here, to
870 be strictly correct. It shouldn't make any practical difference,
871 though, and will make the route dump look complicated... */
872 st->subnets=ipset_to_subnet_list(st->networks);
873 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
874 buffer_new(&st->icmp,ICMP_BUFSIZE);
880 add_hook(PHASE_SETUP,netlink_phase_hook,st);
881 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
883 /* If we're point-to-point then we return a CL_NETLINK directly,
884 rather than a CL_NETLINK_OLD or pure closure (depending on
885 compatibility). This CL_NETLINK is for our one and only
886 client. Our cl.apply function is NULL. */
889 cl=netlink_inst_create(st,loc,dict);
892 return netlink_dev_incoming;
895 /* No connection to the kernel at all... */
901 static bool_t null_set_route(void *sst, struct netlink_route *route)
906 if (route->up!=route->kup) {
907 t=subnet_to_string(route->net);
908 Message(M_INFO,"%s: setting route %s to state %s\n",st->nl.name,
909 t, route->up?"up":"down");
911 route->kup=route->up;
917 static void null_deliver(void *sst, struct buffer_if *buf)
922 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
929 st=safe_malloc(sizeof(*st),"null_apply");
931 item=list_elem(args,0);
932 if (!item || item->type!=t_dict)
933 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
935 dict=item->data.dict;
937 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
940 return new_closure(&st->nl.cl);
943 init_module netlink_module;
944 void netlink_module(dict_t *dict)
946 add_closure(dict,"null-netlink",null_apply);