1 /* User-kernel network link */
3 /* See RFCs 791, 792, 1123 and 1812 */
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
14 /* Points to note from RFC1812 (which may require changes in this
17 3.3.4 Maximum Transmission Unit - MTU
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
29 4.2.1 A router SHOULD count datagrams discarded.
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
35 5.3.13.4 Source Route Options
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
42 5.3.13.5 Record Route Option
44 Routers MUST support the Record Route option in forwarded packets.
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
54 5.3.13.6 Timestamp Option
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
110 #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111 #else /* !NETLINK_DEBUG */
112 #define MDEBUG(...) ((void)0)
113 #endif /* !NETLINK_DEBUG */
115 #define ICMP_TYPE_ECHO_REPLY 0
117 #define ICMP_TYPE_UNREACHABLE 3
118 #define ICMP_CODE_NET_UNREACHABLE 0
119 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121 #define ICMP_CODE_NET_PROHIBITED 13
123 #define ICMP_TYPE_ECHO_REQUEST 8
125 #define ICMP_TYPE_TIME_EXCEEDED 11
126 #define ICMP_CODE_TTL_EXCEEDED 0
128 /* Generic IP checksum routine */
129 static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
131 register uint32_t sum=0;
134 sum+=ntohs(*(uint16_t *)iph);
139 sum+=*(uint8_t *)iph;
141 sum=(sum&0xffff)+(sum>>16);
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
153 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
156 __asm__ __volatile__(
162 "adcl 12(%1), %0 ;\n"
163 "1: adcl 16(%1), %0 ;\n"
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
178 : "1" (iph), "2" (ihl)
183 static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
185 assert(ihl < INT_MAX/4);
186 return ip_csum(iph,ihl*4);
191 #if defined (WORDS_BIGENDIAN)
202 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205 /* reserved 0x8000 */
211 /* The options start here. */
219 union icmpinfofield {
238 static const union icmpinfofield icmp_noinfo;
240 static void netlink_packet_deliver(struct netlink *st,
241 struct netlink_client *client,
242 struct buffer_if *buf);
244 /* XXX RFC1812 4.3.2.5:
245 All other ICMP error messages (Destination Unreachable,
246 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
247 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
248 CONTROL). The IP Precedence value for these error messages MAY be
251 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
252 uint32_t dest,uint16_t len)
256 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
257 buffer_init(&st->icmp,calculate_max_start_pad());
258 h=buf_append(&st->icmp,sizeof(*h));
263 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
266 h->iph.ttl=255; /* XXX should be configurable */
268 h->iph.saddr=htonl(st->secnet_address);
269 h->iph.daddr=htonl(dest);
271 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
278 /* Fill in the ICMP checksum field correctly */
279 static void netlink_icmp_csum(struct icmphdr *h)
283 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
285 h->check=ip_csum(&h->type,len);
289 * An ICMP error message MUST NOT be sent as the result of
292 * * an ICMP error message, or
294 * * a datagram destined to an IP broadcast or IP multicast
297 * * a datagram sent as a link-layer broadcast, or
299 * * a non-initial fragment, or
301 * * a datagram whose source address does not define a single
302 * host -- e.g., a zero address, a loopback address, a
303 * broadcast address, a multicast address, or a Class E
306 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
309 struct icmphdr *icmph;
312 if (buf->size < (int)sizeof(struct icmphdr)) return False;
313 iph=(struct iphdr *)buf->start;
314 icmph=(struct icmphdr *)buf->start;
315 if (iph->protocol==1) {
316 switch(icmph->type) {
317 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
318 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
319 * Deprecated, reserved, unassigned and experimental
320 * options are treated as not safe to reply to.
322 case 0: /* Echo Reply */
324 case 13: /* Timestamp */
325 case 14: /* Timestamp Reply */
331 /* How do we spot broadcast destination addresses? */
332 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
333 source=ntohl(iph->saddr);
334 if (source==0) return False;
335 if ((source&0xff000000)==0x7f000000) return False;
336 /* How do we spot broadcast source addresses? */
337 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
338 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
342 /* How much of the original IP packet do we include in its ICMP
343 response? The header plus up to 64 bits. */
346 4.3.2.3 Original Message Header
348 Historically, every ICMP error message has included the Internet
349 header and at least the first 8 data bytes of the datagram that
350 triggered the error. This is no longer adequate, due to the use of
351 IP-in-IP tunneling and other technologies. Therefore, the ICMP
352 datagram SHOULD contain as much of the original datagram as possible
353 without the length of the ICMP datagram exceeding 576 bytes. The
354 returned IP header (and user data) MUST be identical to that which
355 was received, except that the router is not required to undo any
356 modifications to the IP header that are normally performed in
357 forwarding that were performed before the error was detected (e.g.,
358 decrementing the TTL, or updating options). Note that the
359 requirements of Section [4.3.3.5] supersede this requirement in some
360 cases (i.e., for a Parameter Problem message, if the problem is in a
361 modified field, the router must undo the modification). See Section
364 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
366 if (buf->size < (int)sizeof(struct iphdr)) return 0;
367 struct iphdr *iph=(struct iphdr *)buf->start;
371 /* We include the first 8 bytes of the packet data, provided they exist */
373 plen=ntohs(iph->tot_len);
374 return (hlen>plen?plen:hlen);
377 /* client indicates where the packet we're constructing a response to
378 comes from. NULL indicates the host. */
379 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
380 uint8_t type, uint8_t code,
381 union icmpinfofield info)
386 if (netlink_icmp_may_reply(buf)) {
387 struct iphdr *iph=(struct iphdr *)buf->start;
388 len=netlink_icmp_reply_len(buf);
389 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
390 h->type=type; h->code=code; h->d=info;
391 memcpy(buf_append(&st->icmp,len),buf->start,len);
392 netlink_icmp_csum(h);
393 netlink_packet_deliver(st,NULL,&st->icmp);
394 BUF_ASSERT_FREE(&st->icmp);
399 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
401 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
403 * Is the datagram acceptable?
405 * 1. Length at least the size of an ip header
407 * 3. Checksums correctly.
408 * 4. Doesn't have a bogus length
410 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
411 char *errmsgbuf, int errmsgbuflen)
413 #define BAD(...) do{ \
414 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
418 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
419 struct iphdr *iph=(struct iphdr *)buf->start;
422 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
423 if (iph->version != 4) BAD("version %u",iph->version);
424 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
425 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
426 len=ntohs(iph->tot_len);
427 /* There should be no padding */
428 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
429 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
430 /* XXX check that there's no source route specified */
436 static const char *fragment_filter_header(uint8_t *base, long *hlp)
438 const int fixedhl = sizeof(struct iphdr);
440 const uint8_t *ipend = base + hl;
441 uint8_t *op = base + fixedhl;
442 const uint8_t *ip = op;
446 int remain = ipend - ip;
447 if (opt == 0x00) /* End of Options List */ break;
448 if (opt == 0x01) /* No Operation */ continue;
449 if (remain < 2) return "IPv4 options truncated at length";
451 if (remain < optlen) return "IPv4 options truncated in option";
452 if (opt & 0x80) /* copy */ {
453 memmove(op, ip, optlen);
458 while ((hl = (op - base)) & 0x3)
459 *op++ = 0x00 /* End of Option List */;
460 ((struct iphdr*)base)->ihl = hl >> 2;
466 /* Fragment or send ICMP Fragmentation Needed */
467 static void netlink_maybe_fragment(struct netlink *st,
468 netlink_deliver_fn *deliver,
470 const char *delivery_name,
472 uint32_t source, uint32_t dest,
473 struct buffer_if *buf)
475 struct iphdr *iph=(struct iphdr*)buf->start;
476 long hl = iph->ihl*4;
477 const char *ssource = ipaddr_to_string(source);
479 if (buf->size <= mtu) {
480 deliver(deliver_dst, buf);
484 MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
485 st->name, ssource, delivery_name, buf->size);
487 #define BADFRAG(m, ...) \
489 "%s: fragmenting packet from source %s" \
490 " for transmission via %s: " m "\n", \
491 st->name, ssource, delivery_name, \
494 unsigned orig_frag = ntohs(iph->frag);
496 if (orig_frag&IPHDR_FRAG_DONT) {
497 union icmpinfofield info =
498 { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
499 netlink_icmp_simple(st,buf,
500 ICMP_TYPE_UNREACHABLE,
501 ICMP_CODE_FRAGMENTATION_REQUIRED,
507 BADFRAG("mtu %"PRId32" too small", mtu);
512 /* we (ab)use the icmp buffer to stash the original packet */
513 struct buffer_if *orig = &st->icmp;
514 BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
515 buffer_copy(orig,buf);
518 const uint8_t *startindata = orig->start + hl;
519 const uint8_t *indata = startindata;
520 const uint8_t *endindata = orig->start + orig->size;
524 /* compute our fragment offset */
525 long dataoffset = indata - startindata
526 + (orig_frag & IPHDR_FRAG_OFF)*8;
527 assert(!(dataoffset & 7));
528 if (dataoffset > IPHDR_FRAG_OFF*8) {
529 BADFRAG("ultimate fragment offset out of range");
533 BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
534 buffer_init(buf,calculate_max_start_pad());
536 /* copy header (possibly filtered); will adjust in a bit */
537 struct iphdr *fragh = buf_append(buf, hl);
538 memcpy(fragh, orig->start, hl);
540 /* decide how much payload to copy and copy it */
541 long avail = mtu - hl;
542 long remain = endindata - indata;
543 long use = avail < remain ? (avail & ~(long)7) : remain;
544 memcpy(buf_append(buf, use), indata, use);
547 _Bool last_frag = indata >= endindata;
549 /* adjust the header */
550 fragh->tot_len = htons(buf->size);
552 htons((orig_frag & ~IPHDR_FRAG_OFF) |
553 (last_frag ? 0 : IPHDR_FRAG_MORE) |
556 fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
558 /* actually send it */
559 deliver(deliver_dst, buf);
563 /* after copying the header for the first frag,
564 * we filter the header for the remaining frags */
566 const char *bad = fragment_filter_header(orig->start, &hl);
567 if (bad) { BADFRAG("%s", bad); break; }
576 /* Deliver a packet _to_ client; used after we have decided
577 * what to do with it (and just to check that the client has
578 * actually registered a delivery function with us). */
579 static void netlink_client_deliver(struct netlink *st,
580 struct netlink_client *client,
581 uint32_t source, uint32_t dest,
582 struct buffer_if *buf)
584 if (!client->deliver) {
586 s=ipaddr_to_string(source);
587 d=ipaddr_to_string(dest);
588 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
594 netlink_maybe_fragment(st, client->deliver,client->dst,client->name,
595 client->mtu, source,dest,buf);
599 /* Deliver a packet to the host; used after we have decided that that
600 * is what to do with it. */
601 static void netlink_host_deliver(struct netlink *st,
602 uint32_t source, uint32_t dest,
603 struct buffer_if *buf)
605 netlink_maybe_fragment(st, st->deliver_to_host,st->dst,"(host)",
606 st->mtu, source,dest,buf);
610 /* Deliver a packet. "client" is the _origin_ of the packet, not its
611 destination, and is NULL for packets from the host and packets
612 generated internally in secnet. */
613 static void netlink_packet_deliver(struct netlink *st,
614 struct netlink_client *client,
615 struct buffer_if *buf)
617 if (buf->size < (int)sizeof(struct iphdr)) {
618 Message(M_ERR,"%s: trying to deliver a too-short packet"
619 " from %s!\n",st->name, client?client->name:"(local)");
624 struct iphdr *iph=(struct iphdr *)buf->start;
625 uint32_t dest=ntohl(iph->daddr);
626 uint32_t source=ntohl(iph->saddr);
627 uint32_t best_quality;
628 bool_t allow_route=False;
629 bool_t found_allowed=False;
633 BUF_ASSERT_USED(buf);
635 if (dest==st->secnet_address) {
636 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
641 /* Packets from the host (client==NULL) may always be routed. Packets
642 from clients with the allow_route option will also be routed. */
643 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
646 /* If !allow_route, we check the routing table anyway, and if
647 there's a suitable route with OPT_ALLOWROUTE set we use it. If
648 there's a suitable route, but none with OPT_ALLOWROUTE set then
649 we generate ICMP 'communication with destination network
650 administratively prohibited'. */
654 for (i=0; i<st->n_clients; i++) {
655 if (st->routes[i]->up &&
656 ipset_contains_addr(st->routes[i]->networks,dest)) {
657 /* It's an available route to the correct destination. But is
658 it better than the one we already have? */
660 /* If we have already found an allowed route then we don't
661 bother looking at routes we're not allowed to use. If
662 we don't yet have an allowed route we'll consider any. */
663 if (!allow_route && found_allowed) {
664 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
667 if (st->routes[i]->link_quality>best_quality
668 || best_quality==0) {
669 best_quality=st->routes[i]->link_quality;
671 if (st->routes[i]->options&OPT_ALLOWROUTE)
673 /* If quality isn't perfect we may wish to
674 consider kicking the tunnel with a 0-length
675 packet to prompt it to perform a key setup.
676 Then it'll eventually decide it's up or
678 /* If quality is perfect and we're allowed to use the
679 route we don't need to search any more. */
680 if (best_quality>=MAXIMUM_LINK_QUALITY &&
681 (allow_route || found_allowed)) break;
685 if (best_match==-1) {
686 /* The packet's not going down a tunnel. It might (ought to)
688 if (ipset_contains_addr(st->networks,dest)) {
689 netlink_host_deliver(st,source,dest,buf);
690 BUF_ASSERT_FREE(buf);
693 s=ipaddr_to_string(source);
694 d=ipaddr_to_string(dest);
695 Message(M_DEBUG,"%s: don't know where to deliver packet "
696 "(s=%s, d=%s)\n", st->name, s, d);
698 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
699 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
704 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
706 s=ipaddr_to_string(source);
707 d=ipaddr_to_string(dest);
708 /* We have a usable route but aren't allowed to use it.
709 Generate ICMP destination unreachable: communication
710 with destination network administratively prohibited */
711 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
715 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
716 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
719 if (best_quality>0) {
720 netlink_client_deliver(st,st->routes[best_match],
722 BUF_ASSERT_FREE(buf);
724 /* Generate ICMP destination unreachable */
725 netlink_icmp_simple(st,buf,
726 ICMP_TYPE_UNREACHABLE,
727 ICMP_CODE_NET_UNREACHABLE,
733 BUF_ASSERT_FREE(buf);
736 static void netlink_packet_forward(struct netlink *st,
737 struct netlink_client *client,
738 struct buffer_if *buf)
740 if (buf->size < (int)sizeof(struct iphdr)) return;
741 struct iphdr *iph=(struct iphdr *)buf->start;
743 BUF_ASSERT_USED(buf);
745 /* Packet has already been checked */
747 /* Generate ICMP time exceeded */
748 netlink_icmp_simple(st,buf,ICMP_TYPE_TIME_EXCEEDED,
749 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
755 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
757 netlink_packet_deliver(st,client,buf);
758 BUF_ASSERT_FREE(buf);
761 /* Deal with packets addressed explicitly to us */
762 static void netlink_packet_local(struct netlink *st,
763 struct netlink_client *client,
764 struct buffer_if *buf)
770 if (buf->size < (int)sizeof(struct icmphdr)) {
771 Message(M_WARNING,"%s: short packet addressed to secnet; "
772 "ignoring it\n",st->name);
776 h=(struct icmphdr *)buf->start;
778 unsigned fraginfo = ntohs(h->iph.frag);
779 if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
780 if (!(fraginfo & IPHDR_FRAG_OFF))
781 /* report only for first fragment */
782 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
783 "ignoring it\n",st->name);
788 if (h->iph.protocol==1) {
790 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
791 /* ICMP echo-request. Special case: we re-use the buffer
792 to construct the reply. */
793 h->type=ICMP_TYPE_ECHO_REPLY;
794 h->iph.daddr=h->iph.saddr;
795 h->iph.saddr=htonl(st->secnet_address);
798 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
799 netlink_icmp_csum(h);
800 netlink_packet_deliver(st,NULL,buf);
803 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
805 /* Send ICMP protocol unreachable */
806 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
807 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
815 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
817 static void netlink_incoming(struct netlink *st, struct netlink_client *client,
818 struct buffer_if *buf)
820 uint32_t source,dest;
823 const char *sourcedesc=client?client->name:"host";
825 BUF_ASSERT_USED(buf);
827 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
828 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
834 assert(buf->size >= (int)sizeof(struct iphdr));
835 iph=(struct iphdr *)buf->start;
837 source=ntohl(iph->saddr);
838 dest=ntohl(iph->daddr);
840 /* Check source. If we don't like the source, there's no point
841 generating ICMP because we won't know how to get it to the
842 source of the packet. */
844 /* Check that the packet source is appropriate for the tunnel
846 if (!ipset_contains_addr(client->networks,source)) {
848 s=ipaddr_to_string(source);
849 d=ipaddr_to_string(dest);
850 Message(M_WARNING,"%s: packet from tunnel %s with bad "
851 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
857 /* Check that the packet originates in our configured local
858 network, and hasn't been forwarded from elsewhere or
859 generated with the wrong source address */
860 if (!ipset_contains_addr(st->networks,source)) {
862 s=ipaddr_to_string(source);
863 d=ipaddr_to_string(dest);
864 Message(M_WARNING,"%s: outgoing packet with bad source address "
865 "(s=%s,d=%s)\n",st->name,s,d);
872 /* If this is a point-to-point device we don't examine the
873 destination address at all; we blindly send it down our
874 one-and-only registered tunnel, or to the host, depending on
875 where it came from. It's up to external software to check
876 address validity and generate ICMP, etc. */
879 netlink_host_deliver(st,source,dest,buf);
881 netlink_client_deliver(st,st->clients,source,dest,buf);
883 BUF_ASSERT_FREE(buf);
887 /* st->secnet_address needs checking before matching destination
889 if (dest==st->secnet_address) {
890 netlink_packet_local(st,client,buf);
891 BUF_ASSERT_FREE(buf);
894 netlink_packet_forward(st,client,buf);
895 BUF_ASSERT_FREE(buf);
898 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
900 struct netlink_client *c=sst;
901 struct netlink *st=c->nst;
903 netlink_incoming(st,c,buf);
906 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
908 struct netlink *st=sst;
910 netlink_incoming(st,NULL,buf);
913 static void netlink_set_quality(void *sst, uint32_t quality)
915 struct netlink_client *c=sst;
916 struct netlink *st=c->nst;
918 c->link_quality=quality;
919 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
920 if (c->options&OPT_SOFTROUTE) {
921 st->set_routes(st->dst,c);
925 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
926 struct subnet_list *snets)
931 for (i=0; i<snets->entries; i++) {
932 net=subnet_to_string(snets->list[i]);
933 Message(loglevel,"%s ",net);
938 static void netlink_dump_routes(struct netlink *st, bool_t requested)
944 if (requested) c=M_WARNING;
946 net=ipaddr_to_string(st->secnet_address);
947 Message(c,"%s: point-to-point (remote end is %s); routes: ",
950 netlink_output_subnets(st,c,st->clients->subnets);
953 Message(c,"%s: routing table:\n",st->name);
954 for (i=0; i<st->n_clients; i++) {
955 netlink_output_subnets(st,c,st->routes[i]->subnets);
956 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
957 "quality %d,use %d,pri %lu)\n",
959 st->routes[i]->up?"up":"down",
961 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
962 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
963 st->routes[i]->link_quality,
964 st->routes[i]->outcount,
965 (unsigned long)st->routes[i]->priority);
967 net=ipaddr_to_string(st->secnet_address);
968 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
969 net,st->name,st->localcount);
971 for (i=0; i<st->subnets->entries; i++) {
972 net=subnet_to_string(st->subnets->list[i]);
973 Message(c,"%s ",net);
977 Message(c,"-> host (use %d)\n",st->outcount);
981 /* ap is a pointer to a member of the routes array */
982 static int netlink_compare_client_priority(const void *ap, const void *bp)
984 const struct netlink_client *const*a=ap;
985 const struct netlink_client *const*b=bp;
987 if ((*a)->priority==(*b)->priority) return 0;
988 if ((*a)->priority<(*b)->priority) return 1;
992 static void netlink_phase_hook(void *sst, uint32_t new_phase)
994 struct netlink *st=sst;
995 struct netlink_client *c;
998 /* All the networks serviced by the various tunnels should now
999 * have been registered. We build a routing table by sorting the
1000 * clients by priority. */
1001 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
1002 "netlink_phase_hook");
1003 /* Fill the table */
1005 for (c=st->clients; c; c=c->next) {
1009 /* Sort the table in descending order of priority */
1010 qsort(st->routes,st->n_clients,sizeof(*st->routes),
1011 netlink_compare_client_priority);
1013 netlink_dump_routes(st,False);
1016 static void netlink_signal_handler(void *sst, int signum)
1018 struct netlink *st=sst;
1019 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
1020 netlink_dump_routes(st,True);
1023 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
1025 struct netlink_client *c=sst;
1030 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
1031 void *dst, uint32_t *localmtu_r)
1033 struct netlink_client *c=sst;
1034 struct netlink *st=c->nst;
1040 *localmtu_r=st->mtu;
1043 static struct flagstr netlink_option_table[]={
1044 { "soft", OPT_SOFTROUTE },
1045 { "allow-route", OPT_ALLOWROUTE },
1048 /* This is the routine that gets called when the closure that's
1049 returned by an invocation of a netlink device closure (eg. tun,
1050 userv-ipif) is invoked. It's used to create routes and pass in
1051 information about them; the closure it returns is used by site
1053 static closure_t *netlink_inst_create(struct netlink *st,
1054 struct cloc loc, dict_t *dict)
1056 struct netlink_client *c;
1058 struct ipset *networks;
1059 uint32_t options,priority;
1063 name=dict_read_string(dict, "name", True, st->name, loc);
1065 l=dict_lookup(dict,"routes");
1067 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
1068 networks=string_list_to_ipset(l,loc,st->name,"routes");
1069 options=string_list_to_word(dict_lookup(dict,"options"),
1070 netlink_option_table,st->name);
1072 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
1073 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
1075 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
1076 cfgfatal(loc,st->name,"this netlink device does not support "
1081 if (options&OPT_SOFTROUTE) {
1082 /* XXX for now we assume that soft routes require root privilege;
1083 this may not always be true. The device driver can tell us. */
1084 require_root_privileges=True;
1085 require_root_privileges_explanation="netlink: soft routes";
1087 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
1093 /* Check that nets are a subset of st->remote_networks;
1094 refuse to register if they are not. */
1095 if (!ipset_is_subset(st->remote_networks,networks)) {
1096 cfgfatal(loc,st->name,"routes are not allowed\n");
1100 c=safe_malloc(sizeof(*c),"netlink_inst_create");
1101 c->cl.description=name;
1102 c->cl.type=CL_NETLINK;
1104 c->cl.interface=&c->ops;
1106 c->ops.reg=netlink_inst_reg;
1107 c->ops.deliver=netlink_inst_incoming;
1108 c->ops.set_quality=netlink_set_quality;
1109 c->ops.set_mtu=netlink_inst_set_mtu;
1112 c->networks=networks;
1113 c->subnets=ipset_to_subnet_list(networks);
1114 c->priority=priority;
1118 c->link_quality=LINK_QUALITY_UNUSED;
1119 c->mtu=mtu?mtu:st->mtu;
1124 c->next=st->clients;
1126 assert(st->n_clients < INT_MAX);
1132 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
1133 dict_t *context, list_t *args)
1135 struct netlink *st=self->interface;
1141 item=list_elem(args,0);
1142 if (!item || item->type!=t_dict) {
1143 cfgfatal(loc,st->name,"must have a dictionary argument\n");
1145 dict=item->data.dict;
1147 cl=netlink_inst_create(st,loc,dict);
1149 return new_closure(cl);
1152 netlink_deliver_fn *netlink_init(struct netlink *st,
1153 void *dst, struct cloc loc,
1154 dict_t *dict, cstring_t description,
1155 netlink_route_fn *set_routes,
1156 netlink_deliver_fn *to_host)
1162 st->cl.description=description;
1163 st->cl.type=CL_PURE;
1164 st->cl.apply=netlink_inst_apply;
1165 st->cl.interface=st;
1169 st->set_routes=set_routes;
1170 st->deliver_to_host=to_host;
1172 st->name=dict_read_string(dict,"name",False,description,loc);
1173 if (!st->name) st->name=description;
1174 l=dict_lookup(dict,"networks");
1176 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1178 struct ipset *empty;
1180 st->networks=ipset_complement(empty);
1183 l=dict_lookup(dict,"remote-networks");
1185 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1188 struct ipset *empty;
1190 st->remote_networks=ipset_complement(empty);
1194 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1195 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1197 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1198 "ptp-address in the same netlink device\n");
1200 if (!(sa || ptpa)) {
1201 cfgfatal(loc,st->name,"you must specify secnet-address or "
1202 "ptp-address for this netlink device\n");
1205 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1208 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1211 /* To be strictly correct we could subtract secnet_address from
1212 networks here. It shouldn't make any practical difference,
1213 though, and will make the route dump look complicated... */
1214 st->subnets=ipset_to_subnet_list(st->networks);
1215 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1216 buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
1220 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1221 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1223 /* If we're point-to-point then we return a CL_NETLINK directly,
1224 rather than a CL_NETLINK_OLD or pure closure (depending on
1225 compatibility). This CL_NETLINK is for our one and only
1226 client. Our cl.apply function is NULL. */
1229 cl=netlink_inst_create(st,loc,dict);
1232 return netlink_dev_incoming;
1235 /* No connection to the kernel at all... */
1241 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1243 struct null *st=sst;
1245 if (routes->up!=routes->kup) {
1246 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1247 st->nl.name,routes->name,
1248 routes->up?"up":"down");
1249 routes->kup=routes->up;
1255 static void null_deliver(void *sst, struct buffer_if *buf)
1260 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1267 st=safe_malloc(sizeof(*st),"null_apply");
1269 item=list_elem(args,0);
1270 if (!item || item->type!=t_dict)
1271 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1273 dict=item->data.dict;
1275 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1278 return new_closure(&st->nl.cl);
1281 void netlink_module(dict_t *dict)
1283 add_closure(dict,"null-netlink",null_apply);