1 /* User-kernel network link */
3 /* Each netlink device is actually a router, with its own IP address.
4 We do things like decreasing the TTL and recalculating the header
5 checksum, generating ICMP, responding to pings, etc. */
7 /* This is where we have the anti-spoofing paranoia - before sending a
8 packet to the kernel we check that the tunnel it came over could
9 reasonably have produced it. */
15 /* Generic IP checksum routine */
16 static inline uint16_t ip_csum(uint8_t *iph,uint32_t count)
18 register uint32_t sum=0;
21 sum+=ntohs(*(uint16_t *)iph);
28 sum=(sum&0xffff)+(sum>>16);
34 * This is a version of ip_compute_csum() optimized for IP headers,
35 * which always checksum on 4 octet boundaries.
37 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
40 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl) {
43 __asm__ __volatile__("
62 /* Since the input registers which are loaded with iph and ipl
63 are modified, we must also specify them as outputs, or gcc
64 will assume they contain their original values. */
65 : "=r" (sum), "=r" (iph), "=r" (ihl)
66 : "1" (iph), "2" (ihl));
70 static inline uint16_t ip_fast_csum(uint8_t *iph, uint32_t ihl)
72 return ip_csum(iph,ihl*4);
77 #if defined (WORDS_BIGENDIAN)
93 /* The options start here. */
116 static void netlink_packet_deliver(struct netlink *st,
117 struct netlink_client *client,
118 struct buffer_if *buf);
120 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
121 uint32_t dest,uint16_t len)
125 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
126 buffer_init(&st->icmp,st->max_start_pad);
127 h=buf_append(&st->icmp,sizeof(*h));
132 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
137 h->iph.saddr=htonl(st->secnet_address);
138 h->iph.daddr=htonl(dest);
140 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
147 /* Fill in the ICMP checksum field correctly */
148 static void netlink_icmp_csum(struct icmphdr *h)
152 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
154 h->check=ip_csum(&h->type,len);
158 * An ICMP error message MUST NOT be sent as the result of
161 * * an ICMP error message, or
163 * * a datagram destined to an IP broadcast or IP multicast
166 * * a datagram sent as a link-layer broadcast, or
168 * * a non-initial fragment, or
170 * * a datagram whose source address does not define a single
171 * host -- e.g., a zero address, a loopback address, a
172 * broadcast address, a multicast address, or a Class E
175 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
180 iph=(struct iphdr *)buf->start;
181 if (iph->protocol==1) return False; /* Overly-broad; we may reply to
182 eg. icmp echo-request */
183 /* How do we spot broadcast destination addresses? */
184 if (ntohs(iph->frag_off)&0x1fff) return False; /* Non-initial fragment */
185 source=ntohl(iph->saddr);
186 if (source==0) return False;
187 if ((source&0xff000000)==0x7f000000) return False;
188 /* How do we spot broadcast source addresses? */
189 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
190 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
194 /* How much of the original IP packet do we include in its ICMP
195 response? The header plus up to 64 bits. */
196 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
198 struct iphdr *iph=(struct iphdr *)buf->start;
202 /* We include the first 8 bytes of the packet data, provided they exist */
204 plen=ntohs(iph->tot_len);
205 return (hlen>plen?plen:hlen);
208 /* client indicates where the packet we're constructing a response to
209 comes from. NULL indicates the host. */
210 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
211 struct netlink_client *client,
212 uint8_t type, uint8_t code)
214 struct iphdr *iph=(struct iphdr *)buf->start;
218 if (netlink_icmp_may_reply(buf)) {
219 len=netlink_icmp_reply_len(buf);
220 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
221 h->type=type; h->code=code;
222 memcpy(buf_append(&st->icmp,len),buf->start,len);
223 netlink_icmp_csum(h);
224 netlink_packet_deliver(st,NULL,&st->icmp);
225 BUF_ASSERT_FREE(&st->icmp);
230 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
233 * Is the datagram acceptable?
235 * 1. Length at least the size of an ip header
237 * 3. Checksums correctly.
238 * 4. Doesn't have a bogus length
240 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf)
242 struct iphdr *iph=(struct iphdr *)buf->start;
245 if (iph->ihl < 5 || iph->version != 4) return False;
246 if (buf->size < iph->ihl*4) return False;
247 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) return False;
248 len=ntohs(iph->tot_len);
249 /* There should be no padding */
250 if (buf->size!=len || len<(iph->ihl<<2)) return False;
251 /* XXX check that there's no source route specified */
255 /* Deliver a packet. "client" points to the _origin_ of the packet, not
256 its destination. (May be used when sending ICMP response - avoid
257 asymmetric routing.) */
258 static void netlink_packet_deliver(struct netlink *st,
259 struct netlink_client *client,
260 struct buffer_if *buf)
262 struct iphdr *iph=(struct iphdr *)buf->start;
263 uint32_t dest=ntohl(iph->daddr);
264 uint32_t source=ntohl(iph->saddr);
265 uint32_t best_quality;
269 BUF_ASSERT_USED(buf);
271 if (dest==st->secnet_address) {
272 Message(M_ERROR,"%s: trying to deliver a packet to myself!\n");
278 /* Origin of packet is host or secnet. Might be for a tunnel. */
281 for (i=0; i<st->n_routes; i++) {
282 if (st->routes[i].up && subnet_match(&st->routes[i].net,dest)) {
283 if (st->routes[i].c->link_quality>best_quality
284 || best_quality==0) {
285 best_quality=st->routes[i].c->link_quality;
287 /* If quality isn't perfect we may wish to
288 consider kicking the tunnel with a 0-length
289 packet to prompt it to perform a key setup.
290 Then it'll eventually decide it's up or
292 /* If quality is perfect we don't need to search
294 if (best_quality>=MAXIMUM_LINK_QUALITY) break;
298 if (best_match==-1) {
299 /* Not going down a tunnel. Might be for the host.
300 XXX think about this - only situation should be if we're
302 if (source!=st->secnet_address) {
303 Message(M_ERROR,"netlink_packet_deliver: outgoing packet "
304 "from host that won't fit down any of our tunnels!\n");
305 /* XXX I think this could also occur if a soft tunnel just
306 went down, but still had packets queued in the kernel. */
309 st->deliver_to_host(st->dst,NULL,buf);
310 BUF_ASSERT_FREE(buf);
313 if (best_quality>0) {
314 st->routes[best_match].c->deliver(
315 st->routes[best_match].c->dst,
316 st->routes[best_match].c, buf);
317 BUF_ASSERT_FREE(buf);
319 /* Generate ICMP destination unreachable */
320 netlink_icmp_simple(st,buf,client,3,0); /* client==NULL */
324 } else { /* client is set */
325 /* We know the origin is a tunnel - packet must be for the host */
326 if (subnet_matches_list(&st->networks,dest)) {
327 st->deliver_to_host(st->dst,NULL,buf);
328 BUF_ASSERT_FREE(buf);
330 Message(M_ERROR,"%s: packet from tunnel %s can't be delivered "
331 "to the host\n",st->name,client->name);
332 netlink_icmp_simple(st,buf,client,3,0);
336 BUF_ASSERT_FREE(buf);
339 static void netlink_packet_forward(struct netlink *st,
340 struct netlink_client *client,
341 struct buffer_if *buf)
343 struct iphdr *iph=(struct iphdr *)buf->start;
345 BUF_ASSERT_USED(buf);
347 /* Packet has already been checked */
349 /* Generate ICMP time exceeded */
350 netlink_icmp_simple(st,buf,client,11,0);
356 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
358 netlink_packet_deliver(st,client,buf);
359 BUF_ASSERT_FREE(buf);
362 /* Deal with packets addressed explicitly to us */
363 static void netlink_packet_local(struct netlink *st,
364 struct netlink_client *client,
365 struct buffer_if *buf)
369 h=(struct icmphdr *)buf->start;
371 if ((ntohs(h->iph.frag_off)&0xbfff)!=0) {
372 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
373 "ignoring it\n",st->name);
378 if (h->iph.protocol==1) {
380 if (h->type==8 && h->code==0) {
381 /* ICMP echo-request. Special case: we re-use the buffer
382 to construct the reply. */
384 h->iph.daddr=h->iph.saddr;
385 h->iph.saddr=htonl(st->secnet_address);
386 h->iph.ttl=255; /* Be nice and bump it up again... */
388 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
389 netlink_icmp_csum(h);
390 netlink_packet_deliver(st,NULL,buf);
393 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
395 /* Send ICMP protocol unreachable */
396 netlink_icmp_simple(st,buf,client,3,2);
404 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
406 static void netlink_incoming(void *sst, void *cid, struct buffer_if *buf)
408 struct netlink *st=sst;
409 struct netlink_client *client=cid;
410 uint32_t source,dest;
413 BUF_ASSERT_USED(buf);
414 if (!netlink_check(st,buf)) {
415 Message(M_WARNING,"%s: bad IP packet from %s\n",
416 st->name,client?client->name:"host");
420 iph=(struct iphdr *)buf->start;
422 source=ntohl(iph->saddr);
423 dest=ntohl(iph->daddr);
427 /* Check that the packet source is in 'nets' and its destination is
429 if (!subnet_matches_list(client->networks,source)) {
431 s=ipaddr_to_string(source);
432 d=ipaddr_to_string(dest);
433 Message(M_WARNING,"%s: packet from tunnel %s with bad "
434 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
440 if (!subnet_matches_list(&st->networks,source)) {
442 s=ipaddr_to_string(source);
443 d=ipaddr_to_string(dest);
444 Message(M_WARNING,"%s: outgoing packet with bad source address "
445 "(s=%s,d=%s)\n",st->name,s,d);
451 /* (st->secnet_address needs checking before matching destination
453 if (dest==st->secnet_address) {
454 netlink_packet_local(st,client,buf);
455 BUF_ASSERT_FREE(buf);
459 if (!subnet_matches_list(&st->networks,dest)) {
461 s=ipaddr_to_string(source);
462 d=ipaddr_to_string(dest);
463 Message(M_WARNING,"%s: incoming packet from tunnel %s "
464 "with bad destination address "
465 "(s=%s,d=%s)\n",st->name,client->name,s,d);
471 netlink_packet_forward(st,client,buf);
472 BUF_ASSERT_FREE(buf);
475 static void netlink_set_softlinks(struct netlink *st, struct netlink_client *c,
480 if (!st->routes) return; /* Table has not yet been created */
481 for (i=0; i<st->n_routes; i++) {
482 if (!st->routes[i].hard && st->routes[i].c==c) {
484 st->set_route(st->dst,&st->routes[i]);
489 static void netlink_set_quality(void *sst, void *cid, uint32_t quality)
491 struct netlink *st=sst;
492 struct netlink_client *c=cid;
494 c->link_quality=quality;
495 if (c->link_quality==LINK_QUALITY_DOWN) {
496 netlink_set_softlinks(st,c,False);
498 netlink_set_softlinks(st,c,True);
502 static void *netlink_regnets(void *sst, struct subnet_list *nets,
503 netlink_deliver_fn *deliver, void *dst,
504 uint32_t max_start_pad, uint32_t max_end_pad,
505 bool_t hard_routes, string_t client_name)
507 struct netlink *st=sst;
508 struct netlink_client *c;
510 Message(M_DEBUG_CONFIG,"netlink_regnets: request for %d networks, "
511 "max_start_pad=%d, max_end_pad=%d\n",
512 nets->entries,max_start_pad,max_end_pad);
514 if (!hard_routes && !st->set_route) {
515 Message(M_ERROR,"%s: this netlink device does not support "
521 /* XXX for now we assume that soft routes require root privilege;
522 this may not always be true. */
523 require_root_privileges=True;
524 require_root_privileges_explanation="netlink: soft routes";
528 /* XXX POLICY: do we check nets against local networks? If we do,
529 that prevents things like laptop tunnels working. Perhaps we
530 can have a configuration option for this. Or, if the admin
531 really doesn't want remote sites to be able to claim local
532 addresses, he can list them in exclude-remote-networks. */
533 if (subnet_lists_intersect(&st->networks,nets)) {
534 Message(M_ERROR,"%s: site %s specifies networks that "
535 "intersect with our local networks\n",st->name,client_name);
539 /* Check that nets do not intersect st->exclude_remote_networks;
540 refuse to register if they do. */
541 if (subnet_lists_intersect(&st->exclude_remote_networks,nets)) {
542 Message(M_ERROR,"%s: site %s specifies networks that "
543 "intersect with the explicitly excluded remote networks\n",
544 st->name,client_name);
548 c=safe_malloc(sizeof(*c),"netlink_regnets");
552 c->name=client_name; /* XXX copy it? */
553 c->hard_routes=hard_routes;
554 c->link_quality=LINK_QUALITY_DOWN;
557 if (max_start_pad > st->max_start_pad) st->max_start_pad=max_start_pad;
558 if (max_end_pad > st->max_end_pad) st->max_end_pad=max_end_pad;
559 st->n_routes+=nets->entries;
564 static void netlink_dump_routes(struct netlink *st)
569 Message(M_INFO,"%s: routing table:\n",st->name);
570 for (i=0; i<st->n_routes; i++) {
571 net=subnet_to_string(&st->routes[i].net);
572 Message(M_INFO,"%s -> tunnel %s (%s,%s)\n",net,st->routes[i].c->name,
573 st->routes[i].hard?"hard":"soft",
574 st->routes[i].up?"up":"down");
577 Message(M_INFO,"%s/32 -> netlink \"%s\"\n",
578 ipaddr_to_string(st->secnet_address),st->name);
579 for (i=0; i<st->networks.entries; i++) {
580 net=subnet_to_string(&st->networks.list[i]);
581 Message(M_INFO,"%s -> host\n",net);
586 static int netlink_compare_route_specificity(const void *ap, const void *bp)
588 const struct netlink_route *a=ap;
589 const struct netlink_route *b=bp;
591 if (a->net.len==b->net.len) return 0;
592 if (a->net.len<b->net.len) return 1;
596 static void netlink_phase_hook(void *sst, uint32_t new_phase)
598 struct netlink *st=sst;
599 struct netlink_client *c;
602 /* All the networks serviced by the various tunnels should now
603 * have been registered. We build a routing table by sorting the
604 * routes into most-specific-first order. */
605 st->routes=safe_malloc(st->n_routes*sizeof(*st->routes),
606 "netlink_phase_hook");
609 for (c=st->clients; c; c=c->next) {
610 for (j=0; j<c->networks->entries; j++) {
611 st->routes[i].net=c->networks->list[j];
613 st->routes[i].up=c->hard_routes; /* Hard routes are always up;
614 soft routes default to down */
615 st->routes[i].kup=False;
616 st->routes[i].hard=c->hard_routes;
620 /* ASSERT i==st->n_routes */
621 if (i!=st->n_routes) {
622 fatal("netlink: route count error: expected %d got %d\n",
625 /* Sort the table in descending order of specificity */
626 qsort(st->routes,st->n_routes,sizeof(*st->routes),
627 netlink_compare_route_specificity);
629 netlink_dump_routes(st);
632 netlink_deliver_fn *netlink_init(struct netlink *st,
633 void *dst, struct cloc loc,
634 dict_t *dict, string_t description,
635 netlink_route_fn *set_route,
636 netlink_deliver_fn *to_host)
639 st->cl.description=description;
640 st->cl.type=CL_NETLINK;
642 st->cl.interface=&st->ops;
644 st->ops.regnets=netlink_regnets;
645 st->ops.deliver=netlink_incoming;
646 st->ops.set_quality=netlink_set_quality;
650 st->set_route=set_route;
651 st->deliver_to_host=to_host;
653 st->name=dict_read_string(dict,"name",False,"netlink",loc);
654 if (!st->name) st->name=description;
655 dict_read_subnet_list(dict, "networks", True, "netlink", loc,
657 dict_read_subnet_list(dict, "exclude-remote-networks", False, "netlink",
658 loc, &st->exclude_remote_networks);
659 /* local-address and secnet-address do not have to be in local-networks;
660 however, they should be advertised in the 'sites' file for the
662 st->local_address=string_to_ipaddr(
663 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
664 st->secnet_address=string_to_ipaddr(
665 dict_find_item(dict,"secnet-address", True, "netlink", loc),"netlink");
666 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
667 buffer_new(&st->icmp,ICMP_BUFSIZE);
671 add_hook(PHASE_SETUP,netlink_phase_hook,st);
673 return netlink_incoming;
676 /* No connection to the kernel at all... */
682 static bool_t null_set_route(void *sst, struct netlink_route *route)
687 if (route->up!=route->kup) {
688 t=subnet_to_string(&route->net);
689 Message(M_INFO,"%s: setting route %s to state %s\n",st->nl.name,
690 t, route->up?"up":"down");
692 route->kup=route->up;
698 static void null_deliver(void *sst, void *cid, struct buffer_if *buf)
703 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
710 st=safe_malloc(sizeof(*st),"null_apply");
712 item=list_elem(args,0);
713 if (!item || item->type!=t_dict)
714 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
716 dict=item->data.dict;
718 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
721 return new_closure(&st->nl.cl);
724 init_module netlink_module;
725 void netlink_module(dict_t *dict)
727 add_closure(dict,"null-netlink",null_apply);