1 /* User-kernel network link */
3 /* Each netlink device is actually a router, with its own IP address.
4 We do things like decreasing the TTL and recalculating the header
5 checksum, generating ICMP, responding to pings, etc. */
7 /* This is where we have the anti-spoofing paranoia - before sending a
8 packet to the kernel we check that the tunnel it came over could
9 reasonably have produced it. */
17 #define OPT_SOFTROUTE 1
18 #define OPT_ALLOWROUTE 2
20 /* Generic IP checksum routine */
21 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
23 register uint32_t sum=0;
26 sum+=ntohs(*(uint16_t *)iph);
33 sum=(sum&0xffff)+(sum>>16);
39 * This is a version of ip_compute_csum() optimized for IP headers,
40 * which always checksum on 4 octet boundaries.
42 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
45 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
48 __asm__ __volatile__("
67 /* Since the input registers which are loaded with iph and ipl
68 are modified, we must also specify them as outputs, or gcc
69 will assume they contain their original values. */
70 : "=r" (sum), "=r" (iph), "=r" (ihl)
71 : "1" (iph), "2" (ihl));
75 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
77 return ip_csum(iph,ihl*4);
82 #if defined (WORDS_BIGENDIAN)
98 /* The options start here. */
121 static void netlink_packet_deliver(struct netlink *st,
122 struct netlink_client *client,
123 struct buffer_if *buf);
125 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
126 uint32_t dest,uint16_t len)
130 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
131 buffer_init(&st->icmp,st->max_start_pad);
132 h=buf_append(&st->icmp,sizeof(*h));
137 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
142 h->iph.saddr=htonl(st->secnet_address);
143 h->iph.daddr=htonl(dest);
145 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
152 /* Fill in the ICMP checksum field correctly */
153 static void netlink_icmp_csum(struct icmphdr *h)
157 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
159 h->check=ip_csum(&h->type,len);
163 * An ICMP error message MUST NOT be sent as the result of
166 * * an ICMP error message, or
168 * * a datagram destined to an IP broadcast or IP multicast
171 * * a datagram sent as a link-layer broadcast, or
173 * * a non-initial fragment, or
175 * * a datagram whose source address does not define a single
176 * host -- e.g., a zero address, a loopback address, a
177 * broadcast address, a multicast address, or a Class E
180 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
183 struct icmphdr *icmph;
186 iph=(struct iphdr *)buf->start;
187 icmph=(struct icmphdr *)buf->start;
188 if (iph->protocol==1) {
189 switch(icmph->type) {
190 case 3: /* Destination unreachable */
191 case 11: /* Time Exceeded */
192 case 12: /* Parameter Problem */
196 /* How do we spot broadcast destination addresses? */
197 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
198 source=ntohl(iph->saddr);
199 if (source==0) return False;
200 if ((source&0xff000000)==0x7f000000) return False;
201 /* How do we spot broadcast source addresses? */
202 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
203 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
207 /* How much of the original IP packet do we include in its ICMP
208 response? The header plus up to 64 bits. */
209 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
211 struct iphdr *iph=(struct iphdr *)buf->start;
215 /* We include the first 8 bytes of the packet data, provided they exist */
217 plen=ntohs(iph->tot_len);
218 return (hlen>plen?plen:hlen);
221 /* client indicates where the packet we're constructing a response to
222 comes from. NULL indicates the host. */
223 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
224 struct netlink_client *client,
225 uint8_t type, uint8_t code)
227 struct iphdr *iph=(struct iphdr *)buf->start;
231 if (netlink_icmp_may_reply(buf)) {
232 len=netlink_icmp_reply_len(buf);
233 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
234 h->type=type; h->code=code;
235 memcpy(buf_append(&st->icmp,len),buf->start,len);
236 netlink_icmp_csum(h);
237 netlink_packet_deliver(st,NULL,&st->icmp);
238 BUF_ASSERT_FREE(&st->icmp);
243 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
246 * Is the datagram acceptable?
248 * 1. Length at least the size of an ip header
250 * 3. Checksums correctly.
251 * 4. Doesn't have a bogus length
253 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
255 struct iphdr *iph=(struct iphdr *)buf->start;
258 if (iph->ihl < 5 || iph->version != 4) return False;
259 if (buf->size < iph->ihl*4) return False;
260 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
261 len=ntohs(iph->tot_len);
262 /* There should be no padding */
263 if (buf->size!=len || len<(iph->ihl<<2)) return False;
264 /* XXX check that there's no source route specified */
268 /* Deliver a packet. "client" is the _origin_ of the packet, not its
270 static void netlink_packet_deliver(struct netlink *st,
271 struct netlink_client *client,
272 struct buffer_if *buf)
274 struct iphdr *iph=(struct iphdr *)buf->start;
275 uint32_t dest=ntohl(iph->daddr);
276 uint32_t source=ntohl(iph->saddr);
277 uint32_t best_quality;
278 bool_t allow_route=False;
279 bool_t found_allowed=False;
283 BUF_ASSERT_USED(buf);
285 if (dest==st->secnet_address) {
286 Message(M_ERR,"%s: trying to deliver a packet to myself!\n");
291 /* Packets from the host (client==NULL) will always be routed. Packets
292 from clients with the allow_route option will also be routed. */
293 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
296 /* If !allow_route, we check the routing table anyway, and if
297 there's a suitable route with OPT_ALLOWROUTE set we use it. If
298 there's a suitable route, but none with OPT_ALLOWROUTE set then
299 we generate ICMP 'communication with destination network
300 administratively prohibited'. */
304 for (i=0; i<st->n_routes; i++) {
305 if (st->routes[i].up && subnet_match(&st->routes[i].net,dest)) {
306 /* It's an available route to the correct destination. But is
307 it better than the one we already have? */
309 /* If we have already found an allowed route then we don't
310 bother looking at routes we're not allowed to use. If
311 we don't yet have an allowed route we'll consider any. */
312 if (!allow_route && found_allowed) {
313 if (!(st->routes[i].c->options&OPT_ALLOWROUTE)) continue;
316 if (st->routes[i].c->link_quality>best_quality
317 || best_quality==0) {
318 best_quality=st->routes[i].c->link_quality;
320 if (st->routes[i].c->options&OPT_ALLOWROUTE)
322 /* If quality isn't perfect we may wish to
323 consider kicking the tunnel with a 0-length
324 packet to prompt it to perform a key setup.
325 Then it'll eventually decide it's up or
327 /* If quality is perfect and we're allowed to use the
328 route we don't need to search any more. */
329 if (best_quality>=MAXIMUM_LINK_QUALITY &&
330 (allow_route || found_allowed)) break;
334 if (best_match==-1) {
335 /* The packet's not going down a tunnel. It might (ought to)
337 if (subnet_matches_list(&st->networks,dest)) {
338 st->deliver_to_host(st->dst,buf);
340 BUF_ASSERT_FREE(buf);
343 s=ipaddr_to_string(source);
344 d=ipaddr_to_string(dest);
345 Message(M_ERR,"%s: don't know where to deliver packet "
346 "(s=%s, d=%s)\n", st->name, s, d);
348 netlink_icmp_simple(st,buf,client,3,0);
353 !(st->routes[best_match].c->options&OPT_ALLOWROUTE)) {
355 s=ipaddr_to_string(source);
356 d=ipaddr_to_string(dest);
357 /* We have a usable route but aren't allowed to use it.
358 Generate ICMP destination unreachable: communication
359 with destination network administratively prohibited */
360 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
364 netlink_icmp_simple(st,buf,client,3,9);
367 if (best_quality>0) {
368 st->routes[best_match].c->deliver(
369 st->routes[best_match].c->dst, buf);
370 st->routes[best_match].outcount++;
371 BUF_ASSERT_FREE(buf);
373 /* Generate ICMP destination unreachable */
374 netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
378 BUF_ASSERT_FREE(buf);
381 static void netlink_packet_forward(struct netlink *st,
382 struct netlink_client *client,
383 struct buffer_if *buf)
385 struct iphdr *iph=(struct iphdr *)buf->start;
387 BUF_ASSERT_USED(buf);
389 /* Packet has already been checked */
391 /* Generate ICMP time exceeded */
392 netlink_icmp_simple(st,buf,client,11,0);
398 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
400 netlink_packet_deliver(st,client,buf);
401 BUF_ASSERT_FREE(buf);
404 /* Deal with packets addressed explicitly to us */
405 static void netlink_packet_local(struct netlink *st,
406 struct netlink_client *client,
407 struct buffer_if *buf)
413 h=(struct icmphdr *)buf->start;
415 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
416 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
417 "ignoring it\n",st->name);
422 if (h->iph.protocol==1) {
424 if (h->type==8 && h->code==0) {
425 /* ICMP echo-request. Special case: we re-use the buffer
426 to construct the reply. */
428 h->iph.daddr=h->iph.saddr;
429 h->iph.saddr=htonl(st->secnet_address);
430 h->iph.ttl=255; /* Be nice and bump it up again... */
432 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
433 netlink_icmp_csum(h);
434 netlink_packet_deliver(st,NULL,buf);
437 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
439 /* Send ICMP protocol unreachable */
440 netlink_icmp_simple(st,buf,client,3,2);
448 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
450 static void netlink_incoming(struct netlink *st, struct netlink_client *client,
451 struct buffer_if *buf)
453 uint32_t source,dest;
456 BUF_ASSERT_USED(buf);
457 if (!netlink_check(st,buf)) {
458 Message(M_WARNING,"%s: bad IP packet from %s\n",
459 st->name,client?client->name:"host");
463 iph=(struct iphdr *)buf->start;
465 source=ntohl(iph->saddr);
466 dest=ntohl(iph->daddr);
470 /* Check that the packet source is appropriate for the tunnel
472 if (!subnet_matches_list(&client->networks,source)) {
474 s=ipaddr_to_string(source);
475 d=ipaddr_to_string(dest);
476 Message(M_WARNING,"%s: packet from tunnel %s with bad "
477 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
483 /* Check that the packet originates in our configured local
484 network, and hasn't been forwarded from elsewhere or
485 generated with the wrong source address */
486 if (!subnet_matches_list(&st->networks,source)) {
488 s=ipaddr_to_string(source);
489 d=ipaddr_to_string(dest);
490 Message(M_WARNING,"%s: outgoing packet with bad source address "
491 "(s=%s,d=%s)\n",st->name,s,d);
498 /* If this is a point-to-point device we don't examine the packet at
499 all; we blindly send it down our one-and-only registered tunnel,
500 or to the host, depending on where it came from. */
503 st->deliver_to_host(st->dst,buf);
505 st->clients->deliver(st->clients->dst,buf);
507 BUF_ASSERT_FREE(buf);
511 /* (st->secnet_address needs checking before matching destination
513 if (dest==st->secnet_address) {
514 netlink_packet_local(st,client,buf);
515 BUF_ASSERT_FREE(buf);
519 /* Check for free routing */
520 if (!subnet_matches_list(&st->networks,dest)) {
522 s=ipaddr_to_string(source);
523 d=ipaddr_to_string(dest);
524 Message(M_WARNING,"%s: incoming packet from tunnel %s "
525 "with bad destination address "
526 "(s=%s,d=%s)\n",st->name,client->name,s,d);
532 netlink_packet_forward(st,client,buf);
533 BUF_ASSERT_FREE(buf);
536 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
538 struct netlink_client *c=sst;
539 struct netlink *st=c->nst;
541 netlink_incoming(st,c,buf);
544 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
546 struct netlink *st=sst;
548 netlink_incoming(st,NULL,buf);
551 static void netlink_set_softlinks(struct netlink *st, struct netlink_client *c,
552 bool_t up, uint32_t quality)
556 if (!st->routes) return; /* Table has not yet been created */
557 for (i=0; i<st->n_routes; i++) {
558 if (st->routes[i].c==c) {
559 st->routes[i].quality=quality;
560 if (!st->routes[i].hard) {
562 st->set_route(st->dst,&st->routes[i]);
568 static void netlink_set_quality(void *sst, uint32_t quality)
570 struct netlink_client *c=sst;
571 struct netlink *st=c->nst;
573 c->link_quality=quality;
574 if (c->link_quality==LINK_QUALITY_DOWN) {
575 netlink_set_softlinks(st,c,False,c->link_quality);
577 netlink_set_softlinks(st,c,True,c->link_quality);
581 static void netlink_dump_routes(struct netlink *st, bool_t requested)
587 if (requested) c=M_WARNING;
589 net=ipaddr_to_string(st->secnet_address);
590 Message(c,"%s: point-to-point (remote end is %s); routes:\n",
593 for (i=0; i<st->n_routes; i++) {
594 net=subnet_to_string(&st->routes[i].net);
595 Message(c,"%s ",net);
600 Message(c,"%s: routing table:\n",st->name);
601 for (i=0; i<st->n_routes; i++) {
602 net=subnet_to_string(&st->routes[i].net);
603 Message(c,"%s -> tunnel %s (%s,%s route,%s,quality %d,use %d)\n",net,
604 st->routes[i].c->name,
605 st->routes[i].hard?"hard":"soft",
606 st->routes[i].allow_route?"free":"restricted",
607 st->routes[i].up?"up":"down",
608 st->routes[i].quality,
609 st->routes[i].outcount);
612 net=ipaddr_to_string(st->secnet_address);
613 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
614 net,st->name,st->localcount);
616 for (i=0; i<st->networks.entries; i++) {
617 net=subnet_to_string(&st->networks.list[i]);
618 Message(c,"%s -> host (use %d)\n",net,st->outcount);
624 static int netlink_compare_route_specificity(const void *ap, const void *bp)
626 const struct netlink_route *a=ap;
627 const struct netlink_route *b=bp;
629 if (a->net.len==b->net.len) return 0;
630 if (a->net.len<b->net.len) return 1;
634 static void netlink_phase_hook(void *sst, uint32_t new_phase)
636 struct netlink *st=sst;
637 struct netlink_client *c;
640 /* All the networks serviced by the various tunnels should now
641 * have been registered. We build a routing table by sorting the
642 * routes into most-specific-first order. */
643 st->routes=safe_malloc(st->n_routes*sizeof(*st->routes),
644 "netlink_phase_hook");
647 for (c=st->clients; c; c=c->next) {
648 for (j=0; j<c->networks.entries; j++) {
649 st->routes[i].net=c->networks.list[j];
651 /* Hard routes are always up;
652 soft routes default to down; routes with no 'deliver' function
654 st->routes[i].up=c->deliver?
655 (c->options&OPT_SOFTROUTE?False:True):
657 st->routes[i].kup=False;
658 st->routes[i].hard=c->options&OPT_SOFTROUTE?False:True;
659 st->routes[i].allow_route=c->options&OPT_ALLOWROUTE?
661 st->routes[i].quality=c->link_quality;
662 st->routes[i].outcount=0;
666 /* ASSERT i==st->n_routes */
667 if (i!=st->n_routes) {
668 fatal("netlink: route count error: expected %d got %d\n",
671 /* Sort the table in descending order of specificity */
672 qsort(st->routes,st->n_routes,sizeof(*st->routes),
673 netlink_compare_route_specificity);
675 netlink_dump_routes(st,False);
678 static void netlink_signal_handler(void *sst, int signum)
680 struct netlink *st=sst;
681 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
682 netlink_dump_routes(st,True);
685 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
686 void *dst, uint32_t max_start_pad,
687 uint32_t max_end_pad)
689 struct netlink_client *c=sst;
690 struct netlink *st=c->nst;
692 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
693 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
698 static struct flagstr netlink_option_table[]={
699 { "soft", OPT_SOFTROUTE },
700 { "allow-route", OPT_ALLOWROUTE },
703 /* This is the routine that gets called when the closure that's
704 returned by an invocation of a netlink device closure (eg. tun,
705 userv-ipif) is invoked. It's used to create routes and pass in
706 information about them; the closure it returns is used by site
708 static closure_t *netlink_inst_create(struct netlink *st,
709 struct cloc loc, dict_t *dict)
711 struct netlink_client *c;
713 struct subnet_list networks;
716 name=dict_read_string(dict, "name", True, st->name, loc);
718 dict_read_subnet_list(dict, "routes", True, st->name, loc,
720 options=string_list_to_word(dict_lookup(dict,"options"),
721 netlink_option_table,st->name);
723 if ((options&OPT_SOFTROUTE) && !st->set_route) {
724 cfgfatal(loc,st->name,"this netlink device does not support "
729 if (options&OPT_SOFTROUTE) {
730 /* XXX for now we assume that soft routes require root privilege;
731 this may not always be true. The device driver can tell us. */
732 require_root_privileges=True;
733 require_root_privileges_explanation="netlink: soft routes";
735 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
741 /* Check that nets do not intersect st->exclude_remote_networks;
742 refuse to register if they do. */
743 if (subnet_lists_intersect(&st->exclude_remote_networks,&networks)) {
744 cfgfatal(loc,st->name,"networks intersect with the explicitly "
745 "excluded remote networks\n");
749 c=safe_malloc(sizeof(*c),"netlink_inst_create");
750 c->cl.description=name;
751 c->cl.type=CL_NETLINK;
753 c->cl.interface=&c->ops;
755 c->ops.reg=netlink_inst_reg;
756 c->ops.deliver=netlink_inst_incoming;
757 c->ops.set_quality=netlink_set_quality;
760 c->networks=networks;
765 c->link_quality=LINK_QUALITY_DOWN;
768 st->n_routes+=networks.entries;
773 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
774 dict_t *context, list_t *args)
776 struct netlink *st=self->interface;
782 Message(M_DEBUG_CONFIG,"netlink_inst_apply\n");
784 item=list_elem(args,0);
785 if (!item || item->type!=t_dict) {
786 cfgfatal(loc,st->name,"must have a dictionary argument\n");
788 dict=item->data.dict;
790 cl=netlink_inst_create(st,loc,dict);
792 return new_closure(cl);
795 netlink_deliver_fn *netlink_init(struct netlink *st,
796 void *dst, struct cloc loc,
797 dict_t *dict, string_t description,
798 netlink_route_fn *set_route,
799 netlink_deliver_fn *to_host)
804 st->cl.description=description;
806 st->cl.apply=netlink_inst_apply;
811 st->set_route=set_route;
812 st->deliver_to_host=to_host;
814 st->name=dict_read_string(dict,"name",False,"netlink",loc);
815 if (!st->name) st->name=description;
816 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
818 dict_read_subnet_list(dict, "exclude-remote-networks", False, "netlink",
819 loc, &st->exclude_remote_networks);
820 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
821 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
823 cfgfatal(loc,st->name,"you may not specify secnet-address and "
824 "ptp-address in the same netlink device\n");
827 cfgfatal(loc,st->name,"you must specify secnet-address or "
828 "ptp-address for this netlink device\n");
831 st->secnet_address=string_to_ipaddr(sa,"netlink");
834 st->secnet_address=string_to_ipaddr(ptpa,"netlink");
837 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
838 buffer_new(&st->icmp,ICMP_BUFSIZE);
844 add_hook(PHASE_SETUP,netlink_phase_hook,st);
845 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
847 /* If we're point-to-point then we return a CL_NETLINK directly,
848 rather than a CL_NETLINK_OLD or pure closure (depending on
849 compatibility). This CL_NETLINK is for our one and only
850 client. Our cl.apply function is NULL. */
853 cl=netlink_inst_create(st,loc,dict);
856 return netlink_dev_incoming;
859 /* No connection to the kernel at all... */
865 static bool_t null_set_route(void *sst, struct netlink_route *route)
870 if (route->up!=route->kup) {
871 t=subnet_to_string(&route->net);
872 Message(M_INFO,"%s: setting route %s to state %s\n",st->nl.name,
873 t, route->up?"up":"down");
875 route->kup=route->up;
881 static void null_deliver(void *sst, struct buffer_if *buf)
886 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
893 st=safe_malloc(sizeof(*st),"null_apply");
895 item=list_elem(args,0);
896 if (!item || item->type!=t_dict)
897 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
899 dict=item->data.dict;
901 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
904 return new_closure(&st->nl.cl);
907 init_module netlink_module;
908 void netlink_module(dict_t *dict)
910 add_closure(dict,"null-netlink",null_apply);