1 /* User-kernel network link */
3 /* Each netlink device is actually a router, with its own IP address.
4 We do things like decreasing the TTL and recalculating the header
5 checksum, generating ICMP, responding to pings, etc. */
7 /* This is where we have the anti-spoofing paranoia - before sending a
8 packet to the kernel we check that the tunnel it came over could
9 reasonably have produced it. */
17 #define OPT_SOFTROUTE 1
18 #define OPT_ALLOWROUTE 2
20 /* Generic IP checksum routine */
21 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
23 register uint32_t sum=0;
26 sum+=ntohs(*(uint16_t *)iph);
33 sum=(sum&0xffff)+(sum>>16);
39 * This is a version of ip_compute_csum() optimized for IP headers,
40 * which always checksum on 4 octet boundaries.
42 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
45 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
48 __asm__ __volatile__("
67 /* Since the input registers which are loaded with iph and ipl
68 are modified, we must also specify them as outputs, or gcc
69 will assume they contain their original values. */
70 : "=r" (sum), "=r" (iph), "=r" (ihl)
71 : "1" (iph), "2" (ihl));
75 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
77 return ip_csum(iph,ihl*4);
82 #if defined (WORDS_BIGENDIAN)
98 /* The options start here. */
121 static void netlink_packet_deliver(struct netlink *st,
122 struct netlink_client *client,
123 struct buffer_if *buf);
125 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
126 uint32_t dest,uint16_t len)
130 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
131 buffer_init(&st->icmp,st->max_start_pad);
132 h=buf_append(&st->icmp,sizeof(*h));
137 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
142 h->iph.saddr=htonl(st->secnet_address);
143 h->iph.daddr=htonl(dest);
145 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
152 /* Fill in the ICMP checksum field correctly */
153 static void netlink_icmp_csum(struct icmphdr *h)
157 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
159 h->check=ip_csum(&h->type,len);
163 * An ICMP error message MUST NOT be sent as the result of
166 * * an ICMP error message, or
168 * * a datagram destined to an IP broadcast or IP multicast
171 * * a datagram sent as a link-layer broadcast, or
173 * * a non-initial fragment, or
175 * * a datagram whose source address does not define a single
176 * host -- e.g., a zero address, a loopback address, a
177 * broadcast address, a multicast address, or a Class E
180 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
183 struct icmphdr *icmph;
186 iph=(struct iphdr *)buf->start;
187 icmph=(struct icmphdr *)buf->start;
188 if (iph->protocol==1) {
189 switch(icmph->type) {
190 case 3: /* Destination unreachable */
191 case 11: /* Time Exceeded */
192 case 12: /* Parameter Problem */
196 /* How do we spot broadcast destination addresses? */
197 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
198 source=ntohl(iph->saddr);
199 if (source==0) return False;
200 if ((source&0xff000000)==0x7f000000) return False;
201 /* How do we spot broadcast source addresses? */
202 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
203 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
207 /* How much of the original IP packet do we include in its ICMP
208 response? The header plus up to 64 bits. */
209 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
211 struct iphdr *iph=(struct iphdr *)buf->start;
215 /* We include the first 8 bytes of the packet data, provided they exist */
217 plen=ntohs(iph->tot_len);
218 return (hlen>plen?plen:hlen);
221 /* client indicates where the packet we're constructing a response to
222 comes from. NULL indicates the host. */
223 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
224 struct netlink_client *client,
225 uint8_t type, uint8_t code)
227 struct iphdr *iph=(struct iphdr *)buf->start;
231 if (netlink_icmp_may_reply(buf)) {
232 len=netlink_icmp_reply_len(buf);
233 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
234 h->type=type; h->code=code;
235 memcpy(buf_append(&st->icmp,len),buf->start,len);
236 netlink_icmp_csum(h);
237 netlink_packet_deliver(st,NULL,&st->icmp);
238 BUF_ASSERT_FREE(&st->icmp);
243 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
246 * Is the datagram acceptable?
248 * 1. Length at least the size of an ip header
250 * 3. Checksums correctly.
251 * 4. Doesn't have a bogus length
253 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
255 struct iphdr *iph=(struct iphdr *)buf->start;
258 if (iph->ihl < 5 || iph->version != 4) return False;
259 if (buf->size < iph->ihl*4) return False;
260 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
261 len=ntohs(iph->tot_len);
262 /* There should be no padding */
263 if (buf->size!=len || len<(iph->ihl<<2)) return False;
264 /* XXX check that there's no source route specified */
268 /* Deliver a packet. "client" is the _origin_ of the packet, not its
269 destination, and is NULL for packets from the host and packets
270 generated internally in secnet. */
271 static void netlink_packet_deliver(struct netlink *st,
272 struct netlink_client *client,
273 struct buffer_if *buf)
275 struct iphdr *iph=(struct iphdr *)buf->start;
276 uint32_t dest=ntohl(iph->daddr);
277 uint32_t source=ntohl(iph->saddr);
278 uint32_t best_quality;
279 bool_t allow_route=False;
280 bool_t found_allowed=False;
284 BUF_ASSERT_USED(buf);
286 if (dest==st->secnet_address) {
287 Message(M_ERR,"%s: trying to deliver a packet to myself!\n");
292 /* Packets from the host (client==NULL) may always be routed. Packets
293 from clients with the allow_route option will also be routed. */
294 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
297 /* If !allow_route, we check the routing table anyway, and if
298 there's a suitable route with OPT_ALLOWROUTE set we use it. If
299 there's a suitable route, but none with OPT_ALLOWROUTE set then
300 we generate ICMP 'communication with destination network
301 administratively prohibited'. */
305 for (i=0; i<st->n_clients; i++) {
306 if (st->routes[i]->up &&
307 ipset_contains_addr(st->routes[i]->networks,dest)) {
308 /* It's an available route to the correct destination. But is
309 it better than the one we already have? */
311 /* If we have already found an allowed route then we don't
312 bother looking at routes we're not allowed to use. If
313 we don't yet have an allowed route we'll consider any. */
314 if (!allow_route && found_allowed) {
315 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
318 if (st->routes[i]->link_quality>best_quality
319 || best_quality==0) {
320 best_quality=st->routes[i]->link_quality;
322 if (st->routes[i]->options&OPT_ALLOWROUTE)
324 /* If quality isn't perfect we may wish to
325 consider kicking the tunnel with a 0-length
326 packet to prompt it to perform a key setup.
327 Then it'll eventually decide it's up or
329 /* If quality is perfect and we're allowed to use the
330 route we don't need to search any more. */
331 if (best_quality>=MAXIMUM_LINK_QUALITY &&
332 (allow_route || found_allowed)) break;
336 if (best_match==-1) {
337 /* The packet's not going down a tunnel. It might (ought to)
339 if (ipset_contains_addr(st->networks,dest)) {
340 st->deliver_to_host(st->dst,buf);
342 BUF_ASSERT_FREE(buf);
345 s=ipaddr_to_string(source);
346 d=ipaddr_to_string(dest);
347 Message(M_ERR,"%s: don't know where to deliver packet "
348 "(s=%s, d=%s)\n", st->name, s, d);
350 netlink_icmp_simple(st,buf,client,3,0);
355 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
357 s=ipaddr_to_string(source);
358 d=ipaddr_to_string(dest);
359 /* We have a usable route but aren't allowed to use it.
360 Generate ICMP destination unreachable: communication
361 with destination network administratively prohibited */
362 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
366 netlink_icmp_simple(st,buf,client,3,9);
369 if (best_quality>0) {
370 /* XXX Fragment if required */
371 st->routes[best_match]->deliver(
372 st->routes[best_match]->dst, buf);
373 st->routes[best_match]->outcount++;
374 BUF_ASSERT_FREE(buf);
376 /* Generate ICMP destination unreachable */
377 netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
381 BUF_ASSERT_FREE(buf);
384 static void netlink_packet_forward(struct netlink *st,
385 struct netlink_client *client,
386 struct buffer_if *buf)
388 struct iphdr *iph=(struct iphdr *)buf->start;
390 BUF_ASSERT_USED(buf);
392 /* Packet has already been checked */
394 /* Generate ICMP time exceeded */
395 netlink_icmp_simple(st,buf,client,11,0);
401 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
403 netlink_packet_deliver(st,client,buf);
404 BUF_ASSERT_FREE(buf);
407 /* Deal with packets addressed explicitly to us */
408 static void netlink_packet_local(struct netlink *st,
409 struct netlink_client *client,
410 struct buffer_if *buf)
416 h=(struct icmphdr *)buf->start;
418 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
419 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
420 "ignoring it\n",st->name);
425 if (h->iph.protocol==1) {
427 if (h->type==8 && h->code==0) {
428 /* ICMP echo-request. Special case: we re-use the buffer
429 to construct the reply. */
431 h->iph.daddr=h->iph.saddr;
432 h->iph.saddr=htonl(st->secnet_address);
433 h->iph.ttl=255; /* Be nice and bump it up again... */
435 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
436 netlink_icmp_csum(h);
437 netlink_packet_deliver(st,NULL,buf);
440 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
442 /* Send ICMP protocol unreachable */
443 netlink_icmp_simple(st,buf,client,3,2);
451 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
453 static void netlink_incoming(struct netlink *st, struct netlink_client *client,
454 struct buffer_if *buf)
456 uint32_t source,dest;
459 BUF_ASSERT_USED(buf);
460 if (!netlink_check(st,buf)) {
461 Message(M_WARNING,"%s: bad IP packet from %s\n",
462 st->name,client?client->name:"host");
466 iph=(struct iphdr *)buf->start;
468 source=ntohl(iph->saddr);
469 dest=ntohl(iph->daddr);
471 /* Check source. If we don't like the source, there's no point
472 generating ICMP because we won't know how to get it to the
473 source of the packet. */
475 /* Check that the packet source is appropriate for the tunnel
477 if (!ipset_contains_addr(client->networks,source)) {
479 s=ipaddr_to_string(source);
480 d=ipaddr_to_string(dest);
481 Message(M_WARNING,"%s: packet from tunnel %s with bad "
482 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
488 /* Check that the packet originates in our configured local
489 network, and hasn't been forwarded from elsewhere or
490 generated with the wrong source address */
491 if (!ipset_contains_addr(st->networks,source)) {
493 s=ipaddr_to_string(source);
494 d=ipaddr_to_string(dest);
495 Message(M_WARNING,"%s: outgoing packet with bad source address "
496 "(s=%s,d=%s)\n",st->name,s,d);
503 /* If this is a point-to-point device we don't examine the
504 destination address at all; we blindly send it down our
505 one-and-only registered tunnel, or to the host, depending on
506 where it came from. It's up to external software to check
507 address validity and generate ICMP, etc. */
510 st->deliver_to_host(st->dst,buf);
512 st->clients->deliver(st->clients->dst,buf);
514 BUF_ASSERT_FREE(buf);
518 /* st->secnet_address needs checking before matching destination
520 if (dest==st->secnet_address) {
521 netlink_packet_local(st,client,buf);
522 BUF_ASSERT_FREE(buf);
525 netlink_packet_forward(st,client,buf);
526 BUF_ASSERT_FREE(buf);
529 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
531 struct netlink_client *c=sst;
532 struct netlink *st=c->nst;
534 netlink_incoming(st,c,buf);
537 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
539 struct netlink *st=sst;
541 netlink_incoming(st,NULL,buf);
544 static void netlink_set_quality(void *sst, uint32_t quality)
546 struct netlink_client *c=sst;
547 struct netlink *st=c->nst;
549 c->link_quality=quality;
550 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
551 if (c->options&OPT_SOFTROUTE) {
552 st->set_routes(st->dst,c);
556 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
557 struct subnet_list *snets)
562 for (i=0; i<snets->entries; i++) {
563 net=subnet_to_string(snets->list[i]);
564 Message(loglevel,"%s ",net);
569 static void netlink_dump_routes(struct netlink *st, bool_t requested)
575 if (requested) c=M_WARNING;
577 net=ipaddr_to_string(st->secnet_address);
578 Message(c,"%s: point-to-point (remote end is %s); routes:\n",
581 netlink_output_subnets(st,c,st->clients->subnets);
584 Message(c,"%s: routing table:\n",st->name);
585 for (i=0; i<st->n_clients; i++) {
586 netlink_output_subnets(st,c,st->routes[i]->subnets);
587 Message(c,"-> tunnel %s (%s,%s routes,%s,quality %d,use %d)\n",
589 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
590 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
591 st->routes[i]->up?"up":"down",
592 st->routes[i]->link_quality,
593 st->routes[i]->outcount);
595 net=ipaddr_to_string(st->secnet_address);
596 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
597 net,st->name,st->localcount);
599 for (i=0; i<st->subnets->entries; i++) {
600 net=subnet_to_string(st->subnets->list[i]);
601 Message(c,"%s ",net);
605 Message(c,"-> host (use %d)\n",st->outcount);
609 /* ap is a pointer to a member of the routes array */
610 static int netlink_compare_client_priority(const void *ap, const void *bp)
612 const struct netlink_client *const*a=ap;
613 const struct netlink_client *const*b=bp;
615 if ((*a)->priority==(*b)->priority) return 0;
616 if ((*a)->priority<(*b)->priority) return 1;
620 static void netlink_phase_hook(void *sst, uint32_t new_phase)
622 struct netlink *st=sst;
623 struct netlink_client *c;
626 /* All the networks serviced by the various tunnels should now
627 * have been registered. We build a routing table by sorting the
628 * clients by priority. */
629 st->routes=safe_malloc(st->n_clients*sizeof(*st->routes),
630 "netlink_phase_hook");
633 for (c=st->clients; c; c=c->next)
635 /* Sort the table in descending order of priority */
636 qsort(st->routes,st->n_clients,sizeof(*st->routes),
637 netlink_compare_client_priority);
639 netlink_dump_routes(st,False);
642 static void netlink_signal_handler(void *sst, int signum)
644 struct netlink *st=sst;
645 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
646 netlink_dump_routes(st,True);
649 static void netlink_inst_output_config(void *sst, struct buffer_if *buf)
651 /* struct netlink_client *c=sst; */
652 /* struct netlink *st=c->nst; */
654 /* For now we don't output anything */
655 BUF_ASSERT_USED(buf);
658 static bool_t netlink_inst_check_config(void *sst, struct buffer_if *buf)
660 /* struct netlink_client *c=sst; */
661 /* struct netlink *st=c->nst; */
663 BUF_ASSERT_USED(buf);
664 /* We need to eat all of the configuration information from the buffer
665 for backward compatibility. */
670 static void netlink_inst_set_mtu(void *sst, uint32_t new_mtu)
672 struct netlink_client *c=sst;
677 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
678 void *dst, uint32_t max_start_pad,
679 uint32_t max_end_pad)
681 struct netlink_client *c=sst;
682 struct netlink *st=c->nst;
684 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
685 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
690 static struct flagstr netlink_option_table[]={
691 { "soft", OPT_SOFTROUTE },
692 { "allow-route", OPT_ALLOWROUTE },
695 /* This is the routine that gets called when the closure that's
696 returned by an invocation of a netlink device closure (eg. tun,
697 userv-ipif) is invoked. It's used to create routes and pass in
698 information about them; the closure it returns is used by site
700 static closure_t *netlink_inst_create(struct netlink *st,
701 struct cloc loc, dict_t *dict)
703 struct netlink_client *c;
705 struct ipset *networks;
706 uint32_t options,priority,mtu;
709 name=dict_read_string(dict, "name", True, st->name, loc);
711 l=dict_lookup(dict,"routes");
713 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
714 networks=string_list_to_ipset(l,loc,st->name,"routes");
715 options=string_list_to_word(dict_lookup(dict,"options"),
716 netlink_option_table,st->name);
718 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
719 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
721 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
722 cfgfatal(loc,st->name,"this netlink device does not support "
727 if (options&OPT_SOFTROUTE) {
728 /* XXX for now we assume that soft routes require root privilege;
729 this may not always be true. The device driver can tell us. */
730 require_root_privileges=True;
731 require_root_privileges_explanation="netlink: soft routes";
733 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
739 /* Check that nets are a subset of st->remote_networks;
740 refuse to register if they are not. */
741 if (!ipset_is_subset(st->remote_networks,networks)) {
742 cfgfatal(loc,st->name,"routes are not allowed\n");
746 c=safe_malloc(sizeof(*c),"netlink_inst_create");
747 c->cl.description=name;
748 c->cl.type=CL_NETLINK;
750 c->cl.interface=&c->ops;
752 c->ops.reg=netlink_inst_reg;
753 c->ops.deliver=netlink_inst_incoming;
754 c->ops.set_quality=netlink_set_quality;
755 c->ops.output_config=netlink_inst_output_config;
756 c->ops.check_config=netlink_inst_check_config;
757 c->ops.set_mtu=netlink_inst_set_mtu;
760 c->networks=networks;
761 c->subnets=ipset_to_subnet_list(networks);
762 c->priority=priority;
766 c->link_quality=LINK_QUALITY_DOWN;
767 c->mtu=mtu?mtu:st->mtu;
779 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
780 dict_t *context, list_t *args)
782 struct netlink *st=self->interface;
788 item=list_elem(args,0);
789 if (!item || item->type!=t_dict) {
790 cfgfatal(loc,st->name,"must have a dictionary argument\n");
792 dict=item->data.dict;
794 cl=netlink_inst_create(st,loc,dict);
796 return new_closure(cl);
799 netlink_deliver_fn *netlink_init(struct netlink *st,
800 void *dst, struct cloc loc,
801 dict_t *dict, string_t description,
802 netlink_route_fn *set_routes,
803 netlink_deliver_fn *to_host)
809 st->cl.description=description;
811 st->cl.apply=netlink_inst_apply;
818 st->set_routes=set_routes;
819 st->deliver_to_host=to_host;
821 st->name=dict_read_string(dict,"name",False,description,loc);
822 if (!st->name) st->name=description;
823 l=dict_lookup(dict,"networks");
825 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
827 Message(M_WARNING,"%s: no local networks (parameter \"networks\") "
828 "defined\n",st->name);
829 st->networks=ipset_new();
831 l=dict_lookup(dict,"remote-networks");
833 st->remote_networks=string_list_to_ipset(l,loc,st->name,
838 st->remote_networks=ipset_complement(empty);
842 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
843 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
845 cfgfatal(loc,st->name,"you may not specify secnet-address and "
846 "ptp-address in the same netlink device\n");
849 cfgfatal(loc,st->name,"you must specify secnet-address or "
850 "ptp-address for this netlink device\n");
853 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
856 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
859 /* To be strictly correct we could subtract secnet_address from
860 networks here. It shouldn't make any practical difference,
861 though, and will make the route dump look complicated... */
862 st->subnets=ipset_to_subnet_list(st->networks);
863 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
864 buffer_new(&st->icmp,ICMP_BUFSIZE);
868 add_hook(PHASE_SETUP,netlink_phase_hook,st);
869 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
871 /* If we're point-to-point then we return a CL_NETLINK directly,
872 rather than a CL_NETLINK_OLD or pure closure (depending on
873 compatibility). This CL_NETLINK is for our one and only
874 client. Our cl.apply function is NULL. */
877 cl=netlink_inst_create(st,loc,dict);
880 return netlink_dev_incoming;
883 /* No connection to the kernel at all... */
889 static bool_t null_set_route(void *sst, struct netlink_client *routes)
893 if (routes->up!=routes->kup) {
894 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
895 st->nl.name,routes->name,
896 routes->up?"up":"down");
897 routes->kup=routes->up;
903 static void null_deliver(void *sst, struct buffer_if *buf)
908 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
915 st=safe_malloc(sizeof(*st),"null_apply");
917 item=list_elem(args,0);
918 if (!item || item->type!=t_dict)
919 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
921 dict=item->data.dict;
923 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
926 return new_closure(&st->nl.cl);
929 init_module netlink_module;
930 void netlink_module(dict_t *dict)
932 add_closure(dict,"null-netlink",null_apply);