1 /* User-kernel network link */
4 * This file is part of secnet.
5 * See README for full list of copyright holders.
7 * secnet is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
12 * secnet is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * version 3 along with secnet; if not, see
19 * https://www.gnu.org/licenses/gpl.html.
22 /* See RFCs 791, 792, 1123 and 1812 */
24 /* The netlink device is actually a router. Tunnels are unnumbered
25 point-to-point lines (RFC1812 section 2.2.7); the router has a
26 single address (the 'router-id'). */
28 /* This is where we currently have the anti-spoofing paranoia - before
29 sending a packet to the kernel we check that the tunnel it came
30 over could reasonably have produced it. */
33 /* Points to note from RFC1812 (which may require changes in this
36 3.3.4 Maximum Transmission Unit - MTU
38 The MTU of each logical interface MUST be configurable within the
39 range of legal MTUs for the interface.
41 Many Link Layer protocols define a maximum frame size that may be
42 sent. In such cases, a router MUST NOT allow an MTU to be set which
43 would allow sending of frames larger than those allowed by the Link
44 Layer protocol. However, a router SHOULD be willing to receive a
45 packet as large as the maximum frame size even if that is larger than
48 4.2.1 A router SHOULD count datagrams discarded.
50 4.2.2.1 Source route options - we probably should implement processing
51 of source routes, even though mostly the security policy will prevent
54 5.3.13.4 Source Route Options
56 A router MUST implement support for source route options in forwarded
57 packets. A router MAY implement a configuration option that, when
58 enabled, causes all source-routed packets to be discarded. However,
59 such an option MUST NOT be enabled by default.
61 5.3.13.5 Record Route Option
63 Routers MUST support the Record Route option in forwarded packets.
65 A router MAY provide a configuration option that, if enabled, will
66 cause the router to ignore (i.e., pass through unchanged) Record
67 Route options in forwarded packets. If provided, such an option MUST
68 default to enabling the record-route. This option should not affect
69 the processing of Record Route options in datagrams received by the
70 router itself (in particular, Record Route options in ICMP echo
71 requests will still be processed according to Section [4.3.3.6]).
73 5.3.13.6 Timestamp Option
75 Routers MUST support the timestamp option in forwarded packets. A
76 timestamp value MUST follow the rules given [INTRO:2].
78 If the flags field = 3 (timestamp and prespecified address), the
79 router MUST add its timestamp if the next prespecified address
80 matches any of the router's IP addresses. It is not necessary that
81 the prespecified address be either the address of the interface on
82 which the packet arrived or the address of the interface over which
86 4.2.2.7 Fragmentation: RFC 791 Section 3.2
88 Fragmentation, as described in [INTERNET:1], MUST be supported by a
91 4.2.2.8 Reassembly: RFC 791 Section 3.2
93 As specified in the corresponding section of [INTRO:2], a router MUST
94 support reassembly of datagrams that it delivers to itself.
96 4.2.2.9 Time to Live: RFC 791 Section 3.2
98 Note in particular that a router MUST NOT check the TTL of a packet
99 except when forwarding it.
101 A router MUST NOT discard a datagram just because it was received
102 with TTL equal to zero or one; if it is to the router and otherwise
103 valid, the router MUST attempt to receive it.
105 On messages the router originates, the IP layer MUST provide a means
106 for the transport layer to set the TTL field of every datagram that
107 is sent. When a fixed TTL value is used, it MUST be configurable.
110 8.1 The Simple Network Management Protocol - SNMP
111 8.1.1 SNMP Protocol Elements
113 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
114 using UDP/IP as its transport and network protocols.
129 #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
130 #else /* !NETLINK_DEBUG */
131 #define MDEBUG(...) ((void)0)
132 #endif /* !NETLINK_DEBUG */
134 #define ICMP_TYPE_ECHO_REPLY 0
136 #define ICMP_TYPE_UNREACHABLE 3
137 #define ICMP_CODE_NET_UNREACHABLE 0
138 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
139 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
140 #define ICMP_CODE_NET_PROHIBITED 13
142 #define ICMP_TYPE_ECHO_REQUEST 8
144 #define ICMP_TYPE_TIME_EXCEEDED 11
145 #define ICMP_CODE_TTL_EXCEEDED 0
147 /* Generic IP checksum routine */
148 static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
150 register uint32_t sum=0;
153 sum+=ntohs(*(uint16_t *)iph);
158 sum+=*(uint8_t *)iph;
160 sum=(sum&0xffff)+(sum>>16);
166 * This is a version of ip_compute_csum() optimized for IP headers,
167 * which always checksum on 4 octet boundaries.
169 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
172 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
175 __asm__ __volatile__(
181 "adcl 12(%1), %0 ;\n"
182 "1: adcl 16(%1), %0 ;\n"
193 /* Since the input registers which are loaded with iph and ipl
194 are modified, we must also specify them as outputs, or gcc
195 will assume they contain their original values. */
196 : "=r" (sum), "=r" (iph), "=r" (ihl)
197 : "1" (iph), "2" (ihl)
202 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl)
204 assert(ihl < INT_MAX/4);
205 return ip_csum(iph,ihl*4);
210 #if defined (WORDS_BIGENDIAN)
221 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
222 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
223 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
224 /* reserved 0x8000 */
230 /* The options start here. */
238 union icmpinfofield {
257 static const union icmpinfofield icmp_noinfo;
259 static void netlink_client_deliver(struct netlink *st,
260 struct netlink_client *client,
261 uint32_t source, uint32_t dest,
262 struct buffer_if *buf);
263 static void netlink_host_deliver(struct netlink *st,
264 struct netlink_client *sender,
265 uint32_t source, uint32_t dest,
266 struct buffer_if *buf);
268 static const char *sender_name(struct netlink_client *sender /* or NULL */)
270 return sender?sender->name:"(local)";
273 static void netlink_packet_deliver(struct netlink *st,
274 struct netlink_client *client,
275 struct buffer_if *buf);
277 /* XXX RFC1812 4.3.2.5:
278 All other ICMP error messages (Destination Unreachable,
279 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
280 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
281 CONTROL). The IP Precedence value for these error messages MAY be
284 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
285 uint32_t source, uint32_t dest,
290 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
291 buffer_init(&st->icmp,calculate_max_start_pad());
292 h=buf_append(&st->icmp,sizeof(*h));
297 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
300 h->iph.ttl=255; /* XXX should be configurable */
302 h->iph.saddr=htonl(source);
303 h->iph.daddr=htonl(dest);
305 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
312 /* Fill in the ICMP checksum field correctly */
313 static void netlink_icmp_csum(struct icmphdr *h)
317 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
319 h->check=ip_csum(&h->type,len);
323 * An ICMP error message MUST NOT be sent as the result of
326 * * an ICMP error message, or
328 * * a datagram destined to an IP broadcast or IP multicast
331 * * a datagram sent as a link-layer broadcast, or
333 * * a non-initial fragment, or
335 * * a datagram whose source address does not define a single
336 * host -- e.g., a zero address, a loopback address, a
337 * broadcast address, a multicast address, or a Class E
340 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
343 struct icmphdr *icmph;
346 if (buf->size < (int)sizeof(struct icmphdr)) return False;
347 iph=(struct iphdr *)buf->start;
348 icmph=(struct icmphdr *)buf->start;
349 if (iph->protocol==1) {
350 switch(icmph->type) {
351 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
352 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
353 * Deprecated, reserved, unassigned and experimental
354 * options are treated as not safe to reply to.
356 case 0: /* Echo Reply */
358 case 13: /* Timestamp */
359 case 14: /* Timestamp Reply */
365 /* How do we spot broadcast destination addresses? */
366 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
367 source=ntohl(iph->saddr);
368 if (source==0) return False;
369 if ((source&0xff000000)==0x7f000000) return False;
370 /* How do we spot broadcast source addresses? */
371 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
372 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
376 /* How much of the original IP packet do we include in its ICMP
377 response? The header plus up to 64 bits. */
380 4.3.2.3 Original Message Header
382 Historically, every ICMP error message has included the Internet
383 header and at least the first 8 data bytes of the datagram that
384 triggered the error. This is no longer adequate, due to the use of
385 IP-in-IP tunneling and other technologies. Therefore, the ICMP
386 datagram SHOULD contain as much of the original datagram as possible
387 without the length of the ICMP datagram exceeding 576 bytes. The
388 returned IP header (and user data) MUST be identical to that which
389 was received, except that the router is not required to undo any
390 modifications to the IP header that are normally performed in
391 forwarding that were performed before the error was detected (e.g.,
392 decrementing the TTL, or updating options). Note that the
393 requirements of Section [4.3.3.5] supersede this requirement in some
394 cases (i.e., for a Parameter Problem message, if the problem is in a
395 modified field, the router must undo the modification). See Section
398 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
400 if (buf->size < (int)sizeof(struct iphdr)) return 0;
401 struct iphdr *iph=(struct iphdr *)buf->start;
405 /* We include the first 8 bytes of the packet data, provided they exist */
407 plen=ntohs(iph->tot_len);
408 return MIN(hlen,plen);
411 /* client indicates where the packet we're constructing a response to
412 comes from. NULL indicates the host. */
413 static void netlink_icmp_simple(struct netlink *st,
414 struct netlink_client *origsender,
415 struct buffer_if *buf,
416 uint8_t type, uint8_t code,
417 union icmpinfofield info)
422 if (netlink_icmp_may_reply(buf)) {
423 struct iphdr *iph=(struct iphdr *)buf->start;
425 uint32_t icmpdest = ntohl(iph->saddr);
427 const char *icmpsourcedebugprefix;
429 icmpsource=st->secnet_address;
430 icmpsourcedebugprefix="";
431 } else if (origsender) {
432 /* was from peer, send reply as if from host */
433 icmpsource=st->local_address;
434 icmpsourcedebugprefix="L!";
436 /* was from host, send reply as if from peer */
437 icmpsource=st->secnet_address; /* actually, peer address */
438 icmpsourcedebugprefix="P!";
440 MDEBUG("%s: generating ICMP re %s[%s]->[%s]:"
441 " from %s%s type=%u code=%u\n",
442 st->name, sender_name(origsender),
443 ipaddr_to_string(ntohl(iph->saddr)),
444 ipaddr_to_string(ntohl(iph->daddr)),
445 icmpsourcedebugprefix,
446 ipaddr_to_string(icmpsource),
449 len=netlink_icmp_reply_len(buf);
450 h=netlink_icmp_tmpl(st,icmpsource,icmpdest,len);
451 h->type=type; h->code=code; h->d=info;
452 BUF_ADD_BYTES(append,&st->icmp,buf->start,len);
453 netlink_icmp_csum(h);
456 netlink_packet_deliver(st,NULL,&st->icmp);
457 } else if (origsender) {
458 netlink_client_deliver(st,origsender,icmpsource,icmpdest,&st->icmp);
460 netlink_host_deliver(st,NULL,icmpsource,icmpdest,&st->icmp);
462 BUF_ASSERT_FREE(&st->icmp);
467 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
469 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
471 * Is the datagram acceptable?
473 * 1. Length at least the size of an ip header
475 * 3. Checksums correctly.
476 * 4. Doesn't have a bogus length
478 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
479 char *errmsgbuf, int errmsgbuflen)
481 #define BAD(...) do{ \
482 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
486 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
487 struct iphdr *iph=(struct iphdr *)buf->start;
490 if (iph->version != 4) BAD("version %u",iph->version);
491 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
492 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
493 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
494 len=ntohs(iph->tot_len);
495 /* There should be no padding */
496 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
497 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
498 /* XXX check that there's no source route specified */
504 static const char *fragment_filter_header(uint8_t *base, long *hlp)
506 const int fixedhl = sizeof(struct iphdr);
508 const uint8_t *ipend = base + hl;
509 uint8_t *op = base + fixedhl;
510 const uint8_t *ip = op;
514 int remain = ipend - ip;
515 if (opt == 0x00) /* End of Options List */ break;
516 if (opt == 0x01) /* No Operation */ continue;
517 if (remain < 2) return "IPv4 options truncated at length";
519 if (remain < optlen) return "IPv4 options truncated in option";
520 if (opt & 0x80) /* copy */ {
521 memmove(op, ip, optlen);
526 while ((hl = (op - base)) & 0x3)
527 *op++ = 0x00 /* End of Option List */;
528 ((struct iphdr*)base)->ihl = hl >> 2;
534 /* Fragment or send ICMP Fragmentation Needed */
535 static void netlink_maybe_fragment(struct netlink *st,
536 struct netlink_client *sender,
537 netlink_deliver_fn *deliver,
539 const char *delivery_name,
541 uint32_t source, uint32_t dest,
542 struct buffer_if *buf)
544 struct iphdr *iph=(struct iphdr*)buf->start;
545 long hl = iph->ihl*4;
546 const char *ssource = ipaddr_to_string(source);
548 if (buf->size <= mtu) {
549 deliver(deliver_dst, buf);
553 MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
554 st->name, ssource, delivery_name, buf->size);
556 #define BADFRAG(m, ...) \
558 "%s: fragmenting packet from source %s" \
559 " for transmission via %s: " m "\n", \
560 st->name, ssource, delivery_name, \
563 unsigned orig_frag = ntohs(iph->frag);
565 if (orig_frag&IPHDR_FRAG_DONT) {
566 union icmpinfofield info =
567 { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
568 netlink_icmp_simple(st,sender,buf,
569 ICMP_TYPE_UNREACHABLE,
570 ICMP_CODE_FRAGMENTATION_REQUIRED,
576 BADFRAG("mtu %"PRId32" too small", mtu);
581 /* we (ab)use the icmp buffer to stash the original packet */
582 struct buffer_if *orig = &st->icmp;
583 BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
584 buffer_copy(orig,buf);
587 const uint8_t *startindata = orig->start + hl;
588 const uint8_t *indata = startindata;
589 const uint8_t *endindata = orig->start + orig->size;
593 /* compute our fragment offset */
594 long dataoffset = indata - startindata
595 + (orig_frag & IPHDR_FRAG_OFF)*8;
596 assert(!(dataoffset & 7));
597 if (dataoffset > IPHDR_FRAG_OFF*8) {
598 BADFRAG("ultimate fragment offset out of range");
602 BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
603 buffer_init(buf,calculate_max_start_pad());
605 /* copy header (possibly filtered); will adjust in a bit */
606 struct iphdr *fragh = buf_append(buf, hl);
607 memcpy(fragh, orig->start, hl);
609 /* decide how much payload to copy and copy it */
610 long avail = mtu - hl;
611 long remain = endindata - indata;
612 long use = avail < remain ? (avail & ~(long)7) : remain;
613 BUF_ADD_BYTES(append, buf, indata, use);
616 _Bool last_frag = indata >= endindata;
618 /* adjust the header */
619 fragh->tot_len = htons(buf->size);
621 htons((orig_frag & ~IPHDR_FRAG_OFF) |
622 (last_frag ? 0 : IPHDR_FRAG_MORE) |
625 fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
627 /* actually send it */
628 deliver(deliver_dst, buf);
632 /* after copying the header for the first frag,
633 * we filter the header for the remaining frags */
635 const char *bad = fragment_filter_header(orig->start, &hl);
636 if (bad) { BADFRAG("%s", bad); break; }
645 /* Deliver a packet _to_ client; used after we have decided
646 * what to do with it (and just to check that the client has
647 * actually registered a delivery function with us). */
648 static void netlink_client_deliver(struct netlink *st,
649 struct netlink_client *client,
650 uint32_t source, uint32_t dest,
651 struct buffer_if *buf)
653 if (!client->deliver) {
655 s=ipaddr_to_string(source);
656 d=ipaddr_to_string(dest);
657 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
662 netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name,
663 client->mtu, source,dest,buf);
667 /* Deliver a packet to the host; used after we have decided that that
668 * is what to do with it. */
669 static void netlink_host_deliver(struct netlink *st,
670 struct netlink_client *sender,
671 uint32_t source, uint32_t dest,
672 struct buffer_if *buf)
674 netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)",
675 st->mtu, source,dest,buf);
679 /* Deliver a packet. "sender"==NULL for packets from the host and packets
680 generated internally in secnet. */
681 static void netlink_packet_deliver(struct netlink *st,
682 struct netlink_client *sender,
683 struct buffer_if *buf)
685 if (buf->size < (int)sizeof(struct iphdr)) {
686 Message(M_ERR,"%s: trying to deliver a too-short packet"
687 " from %s!\n",st->name, sender_name(sender));
692 struct iphdr *iph=(struct iphdr *)buf->start;
693 uint32_t dest=ntohl(iph->daddr);
694 uint32_t source=ntohl(iph->saddr);
695 uint32_t best_quality;
696 bool_t allow_route=False;
697 bool_t found_allowed=False;
701 BUF_ASSERT_USED(buf);
703 if (dest==st->secnet_address) {
704 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
709 /* Packets from the host (sender==NULL) may always be routed. Packets
710 from clients with the allow_route option will also be routed. */
711 if (!sender || (sender && (sender->options & OPT_ALLOWROUTE)))
714 /* If !allow_route, we check the routing table anyway, and if
715 there's a suitable route with OPT_ALLOWROUTE set we use it. If
716 there's a suitable route, but none with OPT_ALLOWROUTE set then
717 we generate ICMP 'communication with destination network
718 administratively prohibited'. */
722 for (i=0; i<st->n_clients; i++) {
723 if (st->routes[i]->up &&
724 ipset_contains_addr(st->routes[i]->networks,dest)) {
725 /* It's an available route to the correct destination. But is
726 it better than the one we already have? */
728 /* If we have already found an allowed route then we don't
729 bother looking at routes we're not allowed to use. If
730 we don't yet have an allowed route we'll consider any. */
731 if (!allow_route && found_allowed) {
732 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
735 if (st->routes[i]->link_quality>best_quality
736 || best_quality==0) {
737 best_quality=st->routes[i]->link_quality;
739 if (st->routes[i]->options&OPT_ALLOWROUTE)
741 /* If quality isn't perfect we may wish to
742 consider kicking the tunnel with a 0-length
743 packet to prompt it to perform a key setup.
744 Then it'll eventually decide it's up or
746 /* If quality is perfect and we're allowed to use the
747 route we don't need to search any more. */
748 if (best_quality>=MAXIMUM_LINK_QUALITY &&
749 (allow_route || found_allowed)) break;
753 if (best_match==-1) {
754 /* The packet's not going down a tunnel. It might (ought to)
756 if (ipset_contains_addr(st->networks,dest)) {
757 netlink_host_deliver(st,sender,source,dest,buf);
758 BUF_ASSERT_FREE(buf);
761 s=ipaddr_to_string(source);
762 d=ipaddr_to_string(dest);
763 Message(M_DEBUG,"%s: don't know where to deliver packet "
764 "(s=%s, d=%s)\n", st->name, s, d);
765 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
766 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
771 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
773 s=ipaddr_to_string(source);
774 d=ipaddr_to_string(dest);
775 /* We have a usable route but aren't allowed to use it.
776 Generate ICMP destination unreachable: communication
777 with destination network administratively prohibited */
778 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
781 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
782 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
785 if (best_quality>0) {
786 netlink_client_deliver(st,st->routes[best_match],
788 BUF_ASSERT_FREE(buf);
790 /* Generate ICMP destination unreachable */
791 netlink_icmp_simple(st,sender,buf,
792 ICMP_TYPE_UNREACHABLE,
793 ICMP_CODE_NET_UNREACHABLE,
799 BUF_ASSERT_FREE(buf);
802 static void netlink_packet_forward(struct netlink *st,
803 struct netlink_client *sender,
804 struct buffer_if *buf)
806 if (buf->size < (int)sizeof(struct iphdr)) return;
807 struct iphdr *iph=(struct iphdr *)buf->start;
809 BUF_ASSERT_USED(buf);
811 /* Packet has already been checked */
813 /* Generate ICMP time exceeded */
814 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED,
815 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
821 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
823 netlink_packet_deliver(st,sender,buf);
824 BUF_ASSERT_FREE(buf);
827 /* Deal with packets addressed explicitly to us */
828 static void netlink_packet_local(struct netlink *st,
829 struct netlink_client *sender,
830 struct buffer_if *buf)
836 if (buf->size < (int)sizeof(struct icmphdr)) {
837 Message(M_WARNING,"%s: short packet addressed to secnet; "
838 "ignoring it\n",st->name);
842 h=(struct icmphdr *)buf->start;
844 unsigned fraginfo = ntohs(h->iph.frag);
845 if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
846 if (!(fraginfo & IPHDR_FRAG_OFF))
847 /* report only for first fragment */
848 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
849 "ignoring it\n",st->name);
854 if (h->iph.protocol==1) {
856 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
857 /* ICMP echo-request. Special case: we re-use the buffer
858 to construct the reply. */
859 h->type=ICMP_TYPE_ECHO_REPLY;
860 h->iph.daddr=h->iph.saddr;
861 h->iph.saddr=htonl(st->secnet_address);
864 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
865 netlink_icmp_csum(h);
866 netlink_packet_deliver(st,NULL,buf);
869 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
871 /* Send ICMP protocol unreachable */
872 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
873 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
881 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
883 static void netlink_incoming(struct netlink *st, struct netlink_client *sender,
884 struct buffer_if *buf)
886 uint32_t source,dest;
889 const char *sourcedesc=sender?sender->name:"host";
891 BUF_ASSERT_USED(buf);
893 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
894 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
900 assert(buf->size >= (int)sizeof(struct iphdr));
901 iph=(struct iphdr *)buf->start;
903 source=ntohl(iph->saddr);
904 dest=ntohl(iph->daddr);
906 /* Check source. If we don't like the source, there's no point
907 generating ICMP because we won't know how to get it to the
908 source of the packet. */
910 /* Check that the packet source is appropriate for the tunnel
912 if (!ipset_contains_addr(sender->networks,source)) {
914 s=ipaddr_to_string(source);
915 d=ipaddr_to_string(dest);
916 Message(M_WARNING,"%s: packet from tunnel %s with bad "
917 "source address (s=%s,d=%s)\n",st->name,sender->name,s,d);
922 /* Check that the packet originates in our configured local
923 network, and hasn't been forwarded from elsewhere or
924 generated with the wrong source address */
925 if (!ipset_contains_addr(st->networks,source)) {
927 s=ipaddr_to_string(source);
928 d=ipaddr_to_string(dest);
929 Message(M_WARNING,"%s: outgoing packet with bad source address "
930 "(s=%s,d=%s)\n",st->name,s,d);
936 /* If this is a point-to-point device we don't examine the
937 destination address at all; we blindly send it down our
938 one-and-only registered tunnel, or to the host, depending on
939 where it came from. It's up to external software to check
940 address validity and generate ICMP, etc. */
943 netlink_host_deliver(st,sender,source,dest,buf);
945 netlink_client_deliver(st,st->clients,source,dest,buf);
947 BUF_ASSERT_FREE(buf);
951 /* st->secnet_address needs checking before matching destination
953 if (dest==st->secnet_address) {
954 netlink_packet_local(st,sender,buf);
955 BUF_ASSERT_FREE(buf);
958 netlink_packet_forward(st,sender,buf);
959 BUF_ASSERT_FREE(buf);
962 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
964 struct netlink_client *c=sst;
965 struct netlink *st=c->nst;
967 netlink_incoming(st,c,buf);
970 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
972 struct netlink *st=sst;
974 netlink_incoming(st,NULL,buf);
977 static void netlink_set_quality(void *sst, uint32_t quality)
979 struct netlink_client *c=sst;
980 struct netlink *st=c->nst;
982 c->link_quality=quality;
983 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
984 if (c->options&OPT_SOFTROUTE) {
985 st->set_routes(st->dst,c);
989 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
990 struct subnet_list *snets)
995 for (i=0; i<snets->entries; i++) {
996 net=subnet_to_string(snets->list[i]);
997 Message(loglevel,"%s ",net);
1001 static void netlink_dump_routes(struct netlink *st, bool_t requested)
1007 if (requested) c=M_WARNING;
1009 net=ipaddr_to_string(st->secnet_address);
1010 Message(c,"%s: point-to-point (remote end is %s); routes: ",
1012 netlink_output_subnets(st,c,st->clients->subnets);
1015 Message(c,"%s: routing table:\n",st->name);
1016 for (i=0; i<st->n_clients; i++) {
1017 netlink_output_subnets(st,c,st->routes[i]->subnets);
1018 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
1019 "quality %d,use %d,pri %lu)\n",
1020 st->routes[i]->name,
1021 st->routes[i]->up?"up":"down",
1023 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
1024 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
1025 st->routes[i]->link_quality,
1026 st->routes[i]->outcount,
1027 (unsigned long)st->routes[i]->priority);
1029 net=ipaddr_to_string(st->secnet_address);
1030 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
1031 net,st->name,st->localcount);
1032 for (i=0; i<st->subnets->entries; i++) {
1033 net=subnet_to_string(st->subnets->list[i]);
1034 Message(c,"%s ",net);
1037 Message(c,"-> host (use %d)\n",st->outcount);
1041 /* ap is a pointer to a member of the routes array */
1042 static int netlink_compare_client_priority(const void *ap, const void *bp)
1044 const struct netlink_client *const*a=ap;
1045 const struct netlink_client *const*b=bp;
1047 if ((*a)->priority==(*b)->priority) return 0;
1048 if ((*a)->priority<(*b)->priority) return 1;
1052 static void netlink_phase_hook(void *sst, uint32_t new_phase)
1054 struct netlink *st=sst;
1055 struct netlink_client *c;
1058 /* All the networks serviced by the various tunnels should now
1059 * have been registered. We build a routing table by sorting the
1060 * clients by priority. */
1061 NEW_ARY(st->routes,st->n_clients);
1062 /* Fill the table */
1064 for (c=st->clients; c; c=c->next) {
1068 /* Sort the table in descending order of priority */
1069 qsort(st->routes,st->n_clients,sizeof(*st->routes),
1070 netlink_compare_client_priority);
1072 netlink_dump_routes(st,False);
1075 static void netlink_signal_handler(void *sst, int signum)
1077 struct netlink *st=sst;
1078 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
1079 netlink_dump_routes(st,True);
1082 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
1084 struct netlink_client *c=sst;
1089 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
1090 void *dst, uint32_t *localmtu_r)
1092 struct netlink_client *c=sst;
1093 struct netlink *st=c->nst;
1099 *localmtu_r=st->mtu;
1102 static struct flagstr netlink_option_table[]={
1103 { "soft", OPT_SOFTROUTE },
1104 { "allow-route", OPT_ALLOWROUTE },
1107 /* This is the routine that gets called when the closure that's
1108 returned by an invocation of a netlink device closure (eg. tun,
1109 userv-ipif) is invoked. It's used to create routes and pass in
1110 information about them; the closure it returns is used by site
1112 static closure_t *netlink_inst_create(struct netlink *st,
1113 struct cloc loc, dict_t *dict)
1115 struct netlink_client *c;
1117 struct ipset *networks;
1118 uint32_t options,priority;
1122 name=dict_read_string(dict, "name", True, st->name, loc);
1124 l=dict_lookup(dict,"routes");
1126 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
1127 networks=string_list_to_ipset(l,loc,st->name,"routes");
1128 options=string_list_to_word(dict_lookup(dict,"options"),
1129 netlink_option_table,st->name);
1131 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
1132 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
1134 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
1135 cfgfatal(loc,st->name,"this netlink device does not support "
1140 if (options&OPT_SOFTROUTE) {
1141 /* XXX for now we assume that soft routes require root privilege;
1142 this may not always be true. The device driver can tell us. */
1143 require_root_privileges=True;
1144 require_root_privileges_explanation="netlink: soft routes";
1146 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
1152 /* Check that nets are a subset of st->remote_networks;
1153 refuse to register if they are not. */
1154 if (!ipset_is_subset(st->remote_networks,networks)) {
1155 cfgfatal(loc,st->name,"routes are not allowed\n");
1160 c->cl.description=name;
1161 c->cl.type=CL_NETLINK;
1163 c->cl.interface=&c->ops;
1165 c->ops.reg=netlink_inst_reg;
1166 c->ops.deliver=netlink_inst_incoming;
1167 c->ops.set_quality=netlink_set_quality;
1168 c->ops.set_mtu=netlink_inst_set_mtu;
1171 c->networks=networks;
1172 c->subnets=ipset_to_subnet_list(networks);
1173 c->priority=priority;
1177 c->link_quality=LINK_QUALITY_UNUSED;
1178 c->mtu=mtu?mtu:st->mtu;
1183 c->next=st->clients;
1185 assert(st->n_clients < INT_MAX);
1191 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
1192 dict_t *context, list_t *args)
1194 struct netlink *st=self->interface;
1200 item=list_elem(args,0);
1201 if (!item || item->type!=t_dict) {
1202 cfgfatal(loc,st->name,"must have a dictionary argument\n");
1204 dict=item->data.dict;
1206 cl=netlink_inst_create(st,loc,dict);
1208 return new_closure(cl);
1211 netlink_deliver_fn *netlink_init(struct netlink *st,
1212 void *dst, struct cloc loc,
1213 dict_t *dict, cstring_t description,
1214 netlink_route_fn *set_routes,
1215 netlink_deliver_fn *to_host)
1221 st->cl.description=description;
1222 st->cl.type=CL_PURE;
1223 st->cl.apply=netlink_inst_apply;
1224 st->cl.interface=st;
1228 st->set_routes=set_routes;
1229 st->deliver_to_host=to_host;
1231 st->name=dict_read_string(dict,"name",False,description,loc);
1232 if (!st->name) st->name=description;
1233 l=dict_lookup(dict,"networks");
1235 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1237 struct ipset *empty;
1239 st->networks=ipset_complement(empty);
1242 l=dict_lookup(dict,"remote-networks");
1244 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1247 struct ipset *empty;
1249 st->remote_networks=ipset_complement(empty);
1252 st->local_address=string_item_to_ipaddr(
1253 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
1255 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1256 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1258 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1259 "ptp-address in the same netlink device\n");
1261 if (!(sa || ptpa)) {
1262 cfgfatal(loc,st->name,"you must specify secnet-address or "
1263 "ptp-address for this netlink device\n");
1266 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1269 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1272 /* To be strictly correct we could subtract secnet_address from
1273 networks here. It shouldn't make any practical difference,
1274 though, and will make the route dump look complicated... */
1275 st->subnets=ipset_to_subnet_list(st->networks);
1276 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1277 buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
1281 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1282 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1284 /* If we're point-to-point then we return a CL_NETLINK directly,
1285 rather than a CL_NETLINK_OLD or pure closure (depending on
1286 compatibility). This CL_NETLINK is for our one and only
1287 client. Our cl.apply function is NULL. */
1290 cl=netlink_inst_create(st,loc,dict);
1293 return netlink_dev_incoming;
1296 /* No connection to the kernel at all... */
1302 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1304 struct null *st=sst;
1306 if (routes->up!=routes->kup) {
1307 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1308 st->nl.name,routes->name,
1309 routes->up?"up":"down");
1310 routes->kup=routes->up;
1316 static void null_deliver(void *sst, struct buffer_if *buf)
1321 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1330 item=list_elem(args,0);
1331 if (!item || item->type!=t_dict)
1332 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1334 dict=item->data.dict;
1336 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1339 return new_closure(&st->nl.cl);
1342 void netlink_module(dict_t *dict)
1344 add_closure(dict,"null-netlink",null_apply);