1 /* User-kernel network link */
3 /* See RFCs 791, 792, 1123 and 1812 */
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
14 /* Points to note from RFC1812 (which may require changes in this
17 3.3.4 Maximum Transmission Unit - MTU
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
29 4.2.1 A router SHOULD count datagrams discarded.
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
35 5.3.13.4 Source Route Options
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
42 5.3.13.5 Record Route Option
44 Routers MUST support the Record Route option in forwarded packets.
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
54 5.3.13.6 Timestamp Option
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
110 #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111 #else /* !NETLINK_DEBUG */
112 #define MDEBUG(...) ((void)0)
113 #endif /* !NETLINK_DEBUG */
115 #define ICMP_TYPE_ECHO_REPLY 0
117 #define ICMP_TYPE_UNREACHABLE 3
118 #define ICMP_CODE_NET_UNREACHABLE 0
119 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121 #define ICMP_CODE_NET_PROHIBITED 13
123 #define ICMP_TYPE_ECHO_REQUEST 8
125 #define ICMP_TYPE_TIME_EXCEEDED 11
126 #define ICMP_CODE_TTL_EXCEEDED 0
128 /* Generic IP checksum routine */
129 static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
131 register uint32_t sum=0;
134 sum+=ntohs(*(uint16_t *)iph);
139 sum+=*(uint8_t *)iph;
141 sum=(sum&0xffff)+(sum>>16);
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
153 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
156 __asm__ __volatile__(
162 "adcl 12(%1), %0 ;\n"
163 "1: adcl 16(%1), %0 ;\n"
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
178 : "1" (iph), "2" (ihl)
183 static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
185 assert(ihl < INT_MAX/4);
186 return ip_csum(iph,ihl*4);
191 #if defined (WORDS_BIGENDIAN)
202 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205 /* reserved 0x8000 */
211 /* The options start here. */
219 union icmpinfofield {
238 static const union icmpinfofield icmp_noinfo;
240 static const char *sender_name(struct netlink_client *sender /* or NULL */)
242 return sender?sender->name:"(local)";
245 static void netlink_packet_deliver(struct netlink *st,
246 struct netlink_client *client,
247 struct buffer_if *buf);
249 /* XXX RFC1812 4.3.2.5:
250 All other ICMP error messages (Destination Unreachable,
251 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
252 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
253 CONTROL). The IP Precedence value for these error messages MAY be
256 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
257 uint32_t dest,uint16_t len)
261 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
262 buffer_init(&st->icmp,calculate_max_start_pad());
263 h=buf_append(&st->icmp,sizeof(*h));
268 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
271 h->iph.ttl=255; /* XXX should be configurable */
273 h->iph.saddr=htonl(st->secnet_address);
274 h->iph.daddr=htonl(dest);
276 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
283 /* Fill in the ICMP checksum field correctly */
284 static void netlink_icmp_csum(struct icmphdr *h)
288 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
290 h->check=ip_csum(&h->type,len);
294 * An ICMP error message MUST NOT be sent as the result of
297 * * an ICMP error message, or
299 * * a datagram destined to an IP broadcast or IP multicast
302 * * a datagram sent as a link-layer broadcast, or
304 * * a non-initial fragment, or
306 * * a datagram whose source address does not define a single
307 * host -- e.g., a zero address, a loopback address, a
308 * broadcast address, a multicast address, or a Class E
311 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
314 struct icmphdr *icmph;
317 if (buf->size < (int)sizeof(struct icmphdr)) return False;
318 iph=(struct iphdr *)buf->start;
319 icmph=(struct icmphdr *)buf->start;
320 if (iph->protocol==1) {
321 switch(icmph->type) {
322 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
323 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
324 * Deprecated, reserved, unassigned and experimental
325 * options are treated as not safe to reply to.
327 case 0: /* Echo Reply */
329 case 13: /* Timestamp */
330 case 14: /* Timestamp Reply */
336 /* How do we spot broadcast destination addresses? */
337 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
338 source=ntohl(iph->saddr);
339 if (source==0) return False;
340 if ((source&0xff000000)==0x7f000000) return False;
341 /* How do we spot broadcast source addresses? */
342 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
343 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
347 /* How much of the original IP packet do we include in its ICMP
348 response? The header plus up to 64 bits. */
351 4.3.2.3 Original Message Header
353 Historically, every ICMP error message has included the Internet
354 header and at least the first 8 data bytes of the datagram that
355 triggered the error. This is no longer adequate, due to the use of
356 IP-in-IP tunneling and other technologies. Therefore, the ICMP
357 datagram SHOULD contain as much of the original datagram as possible
358 without the length of the ICMP datagram exceeding 576 bytes. The
359 returned IP header (and user data) MUST be identical to that which
360 was received, except that the router is not required to undo any
361 modifications to the IP header that are normally performed in
362 forwarding that were performed before the error was detected (e.g.,
363 decrementing the TTL, or updating options). Note that the
364 requirements of Section [4.3.3.5] supersede this requirement in some
365 cases (i.e., for a Parameter Problem message, if the problem is in a
366 modified field, the router must undo the modification). See Section
369 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
371 if (buf->size < (int)sizeof(struct iphdr)) return 0;
372 struct iphdr *iph=(struct iphdr *)buf->start;
376 /* We include the first 8 bytes of the packet data, provided they exist */
378 plen=ntohs(iph->tot_len);
379 return (hlen>plen?plen:hlen);
382 /* client indicates where the packet we're constructing a response to
383 comes from. NULL indicates the host. */
384 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
385 uint8_t type, uint8_t code,
386 union icmpinfofield info)
391 if (netlink_icmp_may_reply(buf)) {
392 struct iphdr *iph=(struct iphdr *)buf->start;
393 len=netlink_icmp_reply_len(buf);
394 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
395 h->type=type; h->code=code; h->d=info;
396 memcpy(buf_append(&st->icmp,len),buf->start,len);
397 netlink_icmp_csum(h);
398 netlink_packet_deliver(st,NULL,&st->icmp);
399 BUF_ASSERT_FREE(&st->icmp);
404 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
406 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
408 * Is the datagram acceptable?
410 * 1. Length at least the size of an ip header
412 * 3. Checksums correctly.
413 * 4. Doesn't have a bogus length
415 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
416 char *errmsgbuf, int errmsgbuflen)
418 #define BAD(...) do{ \
419 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
423 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
424 struct iphdr *iph=(struct iphdr *)buf->start;
427 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
428 if (iph->version != 4) BAD("version %u",iph->version);
429 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
430 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
431 len=ntohs(iph->tot_len);
432 /* There should be no padding */
433 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
434 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
435 /* XXX check that there's no source route specified */
441 static const char *fragment_filter_header(uint8_t *base, long *hlp)
443 const int fixedhl = sizeof(struct iphdr);
445 const uint8_t *ipend = base + hl;
446 uint8_t *op = base + fixedhl;
447 const uint8_t *ip = op;
451 int remain = ipend - ip;
452 if (opt == 0x00) /* End of Options List */ break;
453 if (opt == 0x01) /* No Operation */ continue;
454 if (remain < 2) return "IPv4 options truncated at length";
456 if (remain < optlen) return "IPv4 options truncated in option";
457 if (opt & 0x80) /* copy */ {
458 memmove(op, ip, optlen);
463 while ((hl = (op - base)) & 0x3)
464 *op++ = 0x00 /* End of Option List */;
465 ((struct iphdr*)base)->ihl = hl >> 2;
471 /* Fragment or send ICMP Fragmentation Needed */
472 static void netlink_maybe_fragment(struct netlink *st,
473 netlink_deliver_fn *deliver,
475 const char *delivery_name,
477 uint32_t source, uint32_t dest,
478 struct buffer_if *buf)
480 struct iphdr *iph=(struct iphdr*)buf->start;
481 long hl = iph->ihl*4;
482 const char *ssource = ipaddr_to_string(source);
484 if (buf->size <= mtu) {
485 deliver(deliver_dst, buf);
489 MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
490 st->name, ssource, delivery_name, buf->size);
492 #define BADFRAG(m, ...) \
494 "%s: fragmenting packet from source %s" \
495 " for transmission via %s: " m "\n", \
496 st->name, ssource, delivery_name, \
499 unsigned orig_frag = ntohs(iph->frag);
501 if (orig_frag&IPHDR_FRAG_DONT) {
502 union icmpinfofield info =
503 { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
504 netlink_icmp_simple(st,buf,
505 ICMP_TYPE_UNREACHABLE,
506 ICMP_CODE_FRAGMENTATION_REQUIRED,
512 BADFRAG("mtu %"PRId32" too small", mtu);
517 /* we (ab)use the icmp buffer to stash the original packet */
518 struct buffer_if *orig = &st->icmp;
519 BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
520 buffer_copy(orig,buf);
523 const uint8_t *startindata = orig->start + hl;
524 const uint8_t *indata = startindata;
525 const uint8_t *endindata = orig->start + orig->size;
529 /* compute our fragment offset */
530 long dataoffset = indata - startindata
531 + (orig_frag & IPHDR_FRAG_OFF)*8;
532 assert(!(dataoffset & 7));
533 if (dataoffset > IPHDR_FRAG_OFF*8) {
534 BADFRAG("ultimate fragment offset out of range");
538 BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
539 buffer_init(buf,calculate_max_start_pad());
541 /* copy header (possibly filtered); will adjust in a bit */
542 struct iphdr *fragh = buf_append(buf, hl);
543 memcpy(fragh, orig->start, hl);
545 /* decide how much payload to copy and copy it */
546 long avail = mtu - hl;
547 long remain = endindata - indata;
548 long use = avail < remain ? (avail & ~(long)7) : remain;
549 memcpy(buf_append(buf, use), indata, use);
552 _Bool last_frag = indata >= endindata;
554 /* adjust the header */
555 fragh->tot_len = htons(buf->size);
557 htons((orig_frag & ~IPHDR_FRAG_OFF) |
558 (last_frag ? 0 : IPHDR_FRAG_MORE) |
561 fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
563 /* actually send it */
564 deliver(deliver_dst, buf);
568 /* after copying the header for the first frag,
569 * we filter the header for the remaining frags */
571 const char *bad = fragment_filter_header(orig->start, &hl);
572 if (bad) { BADFRAG("%s", bad); break; }
581 /* Deliver a packet _to_ client; used after we have decided
582 * what to do with it (and just to check that the client has
583 * actually registered a delivery function with us). */
584 static void netlink_client_deliver(struct netlink *st,
585 struct netlink_client *client,
586 uint32_t source, uint32_t dest,
587 struct buffer_if *buf)
589 if (!client->deliver) {
591 s=ipaddr_to_string(source);
592 d=ipaddr_to_string(dest);
593 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
599 netlink_maybe_fragment(st, client->deliver,client->dst,client->name,
600 client->mtu, source,dest,buf);
604 /* Deliver a packet to the host; used after we have decided that that
605 * is what to do with it. */
606 static void netlink_host_deliver(struct netlink *st,
607 uint32_t source, uint32_t dest,
608 struct buffer_if *buf)
610 netlink_maybe_fragment(st, st->deliver_to_host,st->dst,"(host)",
611 st->mtu, source,dest,buf);
615 /* Deliver a packet. "sender"==NULL for packets from the host and packets
616 generated internally in secnet. */
617 static void netlink_packet_deliver(struct netlink *st,
618 struct netlink_client *sender,
619 struct buffer_if *buf)
621 if (buf->size < (int)sizeof(struct iphdr)) {
622 Message(M_ERR,"%s: trying to deliver a too-short packet"
623 " from %s!\n",st->name, sender_name(sender));
628 struct iphdr *iph=(struct iphdr *)buf->start;
629 uint32_t dest=ntohl(iph->daddr);
630 uint32_t source=ntohl(iph->saddr);
631 uint32_t best_quality;
632 bool_t allow_route=False;
633 bool_t found_allowed=False;
637 BUF_ASSERT_USED(buf);
639 if (dest==st->secnet_address) {
640 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
645 /* Packets from the host (sender==NULL) may always be routed. Packets
646 from clients with the allow_route option will also be routed. */
647 if (!sender || (sender && (sender->options & OPT_ALLOWROUTE)))
650 /* If !allow_route, we check the routing table anyway, and if
651 there's a suitable route with OPT_ALLOWROUTE set we use it. If
652 there's a suitable route, but none with OPT_ALLOWROUTE set then
653 we generate ICMP 'communication with destination network
654 administratively prohibited'. */
658 for (i=0; i<st->n_clients; i++) {
659 if (st->routes[i]->up &&
660 ipset_contains_addr(st->routes[i]->networks,dest)) {
661 /* It's an available route to the correct destination. But is
662 it better than the one we already have? */
664 /* If we have already found an allowed route then we don't
665 bother looking at routes we're not allowed to use. If
666 we don't yet have an allowed route we'll consider any. */
667 if (!allow_route && found_allowed) {
668 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
671 if (st->routes[i]->link_quality>best_quality
672 || best_quality==0) {
673 best_quality=st->routes[i]->link_quality;
675 if (st->routes[i]->options&OPT_ALLOWROUTE)
677 /* If quality isn't perfect we may wish to
678 consider kicking the tunnel with a 0-length
679 packet to prompt it to perform a key setup.
680 Then it'll eventually decide it's up or
682 /* If quality is perfect and we're allowed to use the
683 route we don't need to search any more. */
684 if (best_quality>=MAXIMUM_LINK_QUALITY &&
685 (allow_route || found_allowed)) break;
689 if (best_match==-1) {
690 /* The packet's not going down a tunnel. It might (ought to)
692 if (ipset_contains_addr(st->networks,dest)) {
693 netlink_host_deliver(st,source,dest,buf);
694 BUF_ASSERT_FREE(buf);
697 s=ipaddr_to_string(source);
698 d=ipaddr_to_string(dest);
699 Message(M_DEBUG,"%s: don't know where to deliver packet "
700 "(s=%s, d=%s)\n", st->name, s, d);
702 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
703 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
708 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
710 s=ipaddr_to_string(source);
711 d=ipaddr_to_string(dest);
712 /* We have a usable route but aren't allowed to use it.
713 Generate ICMP destination unreachable: communication
714 with destination network administratively prohibited */
715 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
719 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
720 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
723 if (best_quality>0) {
724 netlink_client_deliver(st,st->routes[best_match],
726 BUF_ASSERT_FREE(buf);
728 /* Generate ICMP destination unreachable */
729 netlink_icmp_simple(st,buf,
730 ICMP_TYPE_UNREACHABLE,
731 ICMP_CODE_NET_UNREACHABLE,
737 BUF_ASSERT_FREE(buf);
740 static void netlink_packet_forward(struct netlink *st,
741 struct netlink_client *sender,
742 struct buffer_if *buf)
744 if (buf->size < (int)sizeof(struct iphdr)) return;
745 struct iphdr *iph=(struct iphdr *)buf->start;
747 BUF_ASSERT_USED(buf);
749 /* Packet has already been checked */
751 /* Generate ICMP time exceeded */
752 netlink_icmp_simple(st,buf,ICMP_TYPE_TIME_EXCEEDED,
753 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
759 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
761 netlink_packet_deliver(st,sender,buf);
762 BUF_ASSERT_FREE(buf);
765 /* Deal with packets addressed explicitly to us */
766 static void netlink_packet_local(struct netlink *st,
767 struct netlink_client *sender,
768 struct buffer_if *buf)
774 if (buf->size < (int)sizeof(struct icmphdr)) {
775 Message(M_WARNING,"%s: short packet addressed to secnet; "
776 "ignoring it\n",st->name);
780 h=(struct icmphdr *)buf->start;
782 unsigned fraginfo = ntohs(h->iph.frag);
783 if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
784 if (!(fraginfo & IPHDR_FRAG_OFF))
785 /* report only for first fragment */
786 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
787 "ignoring it\n",st->name);
792 if (h->iph.protocol==1) {
794 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
795 /* ICMP echo-request. Special case: we re-use the buffer
796 to construct the reply. */
797 h->type=ICMP_TYPE_ECHO_REPLY;
798 h->iph.daddr=h->iph.saddr;
799 h->iph.saddr=htonl(st->secnet_address);
802 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
803 netlink_icmp_csum(h);
804 netlink_packet_deliver(st,NULL,buf);
807 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
809 /* Send ICMP protocol unreachable */
810 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
811 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
819 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
821 static void netlink_incoming(struct netlink *st, struct netlink_client *sender,
822 struct buffer_if *buf)
824 uint32_t source,dest;
827 const char *sourcedesc=sender?sender->name:"host";
829 BUF_ASSERT_USED(buf);
831 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
832 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
838 assert(buf->size >= (int)sizeof(struct iphdr));
839 iph=(struct iphdr *)buf->start;
841 source=ntohl(iph->saddr);
842 dest=ntohl(iph->daddr);
844 /* Check source. If we don't like the source, there's no point
845 generating ICMP because we won't know how to get it to the
846 source of the packet. */
848 /* Check that the packet source is appropriate for the tunnel
850 if (!ipset_contains_addr(sender->networks,source)) {
852 s=ipaddr_to_string(source);
853 d=ipaddr_to_string(dest);
854 Message(M_WARNING,"%s: packet from tunnel %s with bad "
855 "source address (s=%s,d=%s)\n",st->name,sender->name,s,d);
861 /* Check that the packet originates in our configured local
862 network, and hasn't been forwarded from elsewhere or
863 generated with the wrong source address */
864 if (!ipset_contains_addr(st->networks,source)) {
866 s=ipaddr_to_string(source);
867 d=ipaddr_to_string(dest);
868 Message(M_WARNING,"%s: outgoing packet with bad source address "
869 "(s=%s,d=%s)\n",st->name,s,d);
876 /* If this is a point-to-point device we don't examine the
877 destination address at all; we blindly send it down our
878 one-and-only registered tunnel, or to the host, depending on
879 where it came from. It's up to external software to check
880 address validity and generate ICMP, etc. */
883 netlink_host_deliver(st,source,dest,buf);
885 netlink_client_deliver(st,st->clients,source,dest,buf);
887 BUF_ASSERT_FREE(buf);
891 /* st->secnet_address needs checking before matching destination
893 if (dest==st->secnet_address) {
894 netlink_packet_local(st,sender,buf);
895 BUF_ASSERT_FREE(buf);
898 netlink_packet_forward(st,sender,buf);
899 BUF_ASSERT_FREE(buf);
902 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
904 struct netlink_client *c=sst;
905 struct netlink *st=c->nst;
907 netlink_incoming(st,c,buf);
910 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
912 struct netlink *st=sst;
914 netlink_incoming(st,NULL,buf);
917 static void netlink_set_quality(void *sst, uint32_t quality)
919 struct netlink_client *c=sst;
920 struct netlink *st=c->nst;
922 c->link_quality=quality;
923 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
924 if (c->options&OPT_SOFTROUTE) {
925 st->set_routes(st->dst,c);
929 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
930 struct subnet_list *snets)
935 for (i=0; i<snets->entries; i++) {
936 net=subnet_to_string(snets->list[i]);
937 Message(loglevel,"%s ",net);
942 static void netlink_dump_routes(struct netlink *st, bool_t requested)
948 if (requested) c=M_WARNING;
950 net=ipaddr_to_string(st->secnet_address);
951 Message(c,"%s: point-to-point (remote end is %s); routes: ",
954 netlink_output_subnets(st,c,st->clients->subnets);
957 Message(c,"%s: routing table:\n",st->name);
958 for (i=0; i<st->n_clients; i++) {
959 netlink_output_subnets(st,c,st->routes[i]->subnets);
960 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
961 "quality %d,use %d,pri %lu)\n",
963 st->routes[i]->up?"up":"down",
965 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
966 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
967 st->routes[i]->link_quality,
968 st->routes[i]->outcount,
969 (unsigned long)st->routes[i]->priority);
971 net=ipaddr_to_string(st->secnet_address);
972 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
973 net,st->name,st->localcount);
975 for (i=0; i<st->subnets->entries; i++) {
976 net=subnet_to_string(st->subnets->list[i]);
977 Message(c,"%s ",net);
981 Message(c,"-> host (use %d)\n",st->outcount);
985 /* ap is a pointer to a member of the routes array */
986 static int netlink_compare_client_priority(const void *ap, const void *bp)
988 const struct netlink_client *const*a=ap;
989 const struct netlink_client *const*b=bp;
991 if ((*a)->priority==(*b)->priority) return 0;
992 if ((*a)->priority<(*b)->priority) return 1;
996 static void netlink_phase_hook(void *sst, uint32_t new_phase)
998 struct netlink *st=sst;
999 struct netlink_client *c;
1002 /* All the networks serviced by the various tunnels should now
1003 * have been registered. We build a routing table by sorting the
1004 * clients by priority. */
1005 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
1006 "netlink_phase_hook");
1007 /* Fill the table */
1009 for (c=st->clients; c; c=c->next) {
1013 /* Sort the table in descending order of priority */
1014 qsort(st->routes,st->n_clients,sizeof(*st->routes),
1015 netlink_compare_client_priority);
1017 netlink_dump_routes(st,False);
1020 static void netlink_signal_handler(void *sst, int signum)
1022 struct netlink *st=sst;
1023 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
1024 netlink_dump_routes(st,True);
1027 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
1029 struct netlink_client *c=sst;
1034 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
1035 void *dst, uint32_t *localmtu_r)
1037 struct netlink_client *c=sst;
1038 struct netlink *st=c->nst;
1044 *localmtu_r=st->mtu;
1047 static struct flagstr netlink_option_table[]={
1048 { "soft", OPT_SOFTROUTE },
1049 { "allow-route", OPT_ALLOWROUTE },
1052 /* This is the routine that gets called when the closure that's
1053 returned by an invocation of a netlink device closure (eg. tun,
1054 userv-ipif) is invoked. It's used to create routes and pass in
1055 information about them; the closure it returns is used by site
1057 static closure_t *netlink_inst_create(struct netlink *st,
1058 struct cloc loc, dict_t *dict)
1060 struct netlink_client *c;
1062 struct ipset *networks;
1063 uint32_t options,priority;
1067 name=dict_read_string(dict, "name", True, st->name, loc);
1069 l=dict_lookup(dict,"routes");
1071 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
1072 networks=string_list_to_ipset(l,loc,st->name,"routes");
1073 options=string_list_to_word(dict_lookup(dict,"options"),
1074 netlink_option_table,st->name);
1076 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
1077 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
1079 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
1080 cfgfatal(loc,st->name,"this netlink device does not support "
1085 if (options&OPT_SOFTROUTE) {
1086 /* XXX for now we assume that soft routes require root privilege;
1087 this may not always be true. The device driver can tell us. */
1088 require_root_privileges=True;
1089 require_root_privileges_explanation="netlink: soft routes";
1091 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
1097 /* Check that nets are a subset of st->remote_networks;
1098 refuse to register if they are not. */
1099 if (!ipset_is_subset(st->remote_networks,networks)) {
1100 cfgfatal(loc,st->name,"routes are not allowed\n");
1104 c=safe_malloc(sizeof(*c),"netlink_inst_create");
1105 c->cl.description=name;
1106 c->cl.type=CL_NETLINK;
1108 c->cl.interface=&c->ops;
1110 c->ops.reg=netlink_inst_reg;
1111 c->ops.deliver=netlink_inst_incoming;
1112 c->ops.set_quality=netlink_set_quality;
1113 c->ops.set_mtu=netlink_inst_set_mtu;
1116 c->networks=networks;
1117 c->subnets=ipset_to_subnet_list(networks);
1118 c->priority=priority;
1122 c->link_quality=LINK_QUALITY_UNUSED;
1123 c->mtu=mtu?mtu:st->mtu;
1128 c->next=st->clients;
1130 assert(st->n_clients < INT_MAX);
1136 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
1137 dict_t *context, list_t *args)
1139 struct netlink *st=self->interface;
1145 item=list_elem(args,0);
1146 if (!item || item->type!=t_dict) {
1147 cfgfatal(loc,st->name,"must have a dictionary argument\n");
1149 dict=item->data.dict;
1151 cl=netlink_inst_create(st,loc,dict);
1153 return new_closure(cl);
1156 netlink_deliver_fn *netlink_init(struct netlink *st,
1157 void *dst, struct cloc loc,
1158 dict_t *dict, cstring_t description,
1159 netlink_route_fn *set_routes,
1160 netlink_deliver_fn *to_host)
1166 st->cl.description=description;
1167 st->cl.type=CL_PURE;
1168 st->cl.apply=netlink_inst_apply;
1169 st->cl.interface=st;
1173 st->set_routes=set_routes;
1174 st->deliver_to_host=to_host;
1176 st->name=dict_read_string(dict,"name",False,description,loc);
1177 if (!st->name) st->name=description;
1178 l=dict_lookup(dict,"networks");
1180 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1182 struct ipset *empty;
1184 st->networks=ipset_complement(empty);
1187 l=dict_lookup(dict,"remote-networks");
1189 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1192 struct ipset *empty;
1194 st->remote_networks=ipset_complement(empty);
1198 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1199 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1201 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1202 "ptp-address in the same netlink device\n");
1204 if (!(sa || ptpa)) {
1205 cfgfatal(loc,st->name,"you must specify secnet-address or "
1206 "ptp-address for this netlink device\n");
1209 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1212 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1215 /* To be strictly correct we could subtract secnet_address from
1216 networks here. It shouldn't make any practical difference,
1217 though, and will make the route dump look complicated... */
1218 st->subnets=ipset_to_subnet_list(st->networks);
1219 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1220 buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
1224 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1225 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1227 /* If we're point-to-point then we return a CL_NETLINK directly,
1228 rather than a CL_NETLINK_OLD or pure closure (depending on
1229 compatibility). This CL_NETLINK is for our one and only
1230 client. Our cl.apply function is NULL. */
1233 cl=netlink_inst_create(st,loc,dict);
1236 return netlink_dev_incoming;
1239 /* No connection to the kernel at all... */
1245 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1247 struct null *st=sst;
1249 if (routes->up!=routes->kup) {
1250 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1251 st->nl.name,routes->name,
1252 routes->up?"up":"down");
1253 routes->kup=routes->up;
1259 static void null_deliver(void *sst, struct buffer_if *buf)
1264 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1271 st=safe_malloc(sizeof(*st),"null_apply");
1273 item=list_elem(args,0);
1274 if (!item || item->type!=t_dict)
1275 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1277 dict=item->data.dict;
1279 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1282 return new_closure(&st->nl.cl);
1285 void netlink_module(dict_t *dict)
1287 add_closure(dict,"null-netlink",null_apply);