1 /* User-kernel network link */
3 /* See RFCs 791, 792, 1123 and 1812 */
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
14 /* Points to note from RFC1812 (which may require changes in this
17 3.3.4 Maximum Transmission Unit - MTU
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
29 4.2.1 A router SHOULD count datagrams discarded.
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
35 5.3.13.4 Source Route Options
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
42 5.3.13.5 Record Route Option
44 Routers MUST support the Record Route option in forwarded packets.
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
54 5.3.13.6 Timestamp Option
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
110 #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111 #else /* !NETLINK_DEBUG */
112 #define MDEBUG(...) ((void)0)
113 #endif /* !NETLINK_DEBUG */
115 #define ICMP_TYPE_ECHO_REPLY 0
117 #define ICMP_TYPE_UNREACHABLE 3
118 #define ICMP_CODE_NET_UNREACHABLE 0
119 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121 #define ICMP_CODE_NET_PROHIBITED 13
123 #define ICMP_TYPE_ECHO_REQUEST 8
125 #define ICMP_TYPE_TIME_EXCEEDED 11
126 #define ICMP_CODE_TTL_EXCEEDED 0
128 /* Generic IP checksum routine */
129 static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
131 register uint32_t sum=0;
134 sum+=ntohs(*(uint16_t *)iph);
139 sum+=*(uint8_t *)iph;
141 sum=(sum&0xffff)+(sum>>16);
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
153 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
156 __asm__ __volatile__(
162 "adcl 12(%1), %0 ;\n"
163 "1: adcl 16(%1), %0 ;\n"
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
178 : "1" (iph), "2" (ihl)
183 static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
185 assert(ihl < INT_MAX/4);
186 return ip_csum(iph,ihl*4);
191 #if defined (WORDS_BIGENDIAN)
202 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205 /* reserved 0x8000 */
211 /* The options start here. */
219 union icmpinfofield {
238 static const union icmpinfofield icmp_noinfo;
240 static const char *sender_name(struct netlink_client *sender /* or NULL */)
242 return sender?sender->name:"(local)";
245 static void netlink_packet_deliver(struct netlink *st,
246 struct netlink_client *client,
247 struct buffer_if *buf);
249 /* XXX RFC1812 4.3.2.5:
250 All other ICMP error messages (Destination Unreachable,
251 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
252 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
253 CONTROL). The IP Precedence value for these error messages MAY be
256 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
257 uint32_t dest,uint16_t len)
261 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
262 buffer_init(&st->icmp,calculate_max_start_pad());
263 h=buf_append(&st->icmp,sizeof(*h));
268 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
271 h->iph.ttl=255; /* XXX should be configurable */
273 h->iph.saddr=htonl(st->secnet_address);
274 h->iph.daddr=htonl(dest);
276 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
283 /* Fill in the ICMP checksum field correctly */
284 static void netlink_icmp_csum(struct icmphdr *h)
288 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
290 h->check=ip_csum(&h->type,len);
294 * An ICMP error message MUST NOT be sent as the result of
297 * * an ICMP error message, or
299 * * a datagram destined to an IP broadcast or IP multicast
302 * * a datagram sent as a link-layer broadcast, or
304 * * a non-initial fragment, or
306 * * a datagram whose source address does not define a single
307 * host -- e.g., a zero address, a loopback address, a
308 * broadcast address, a multicast address, or a Class E
311 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
314 struct icmphdr *icmph;
317 if (buf->size < (int)sizeof(struct icmphdr)) return False;
318 iph=(struct iphdr *)buf->start;
319 icmph=(struct icmphdr *)buf->start;
320 if (iph->protocol==1) {
321 switch(icmph->type) {
322 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
323 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
324 * Deprecated, reserved, unassigned and experimental
325 * options are treated as not safe to reply to.
327 case 0: /* Echo Reply */
329 case 13: /* Timestamp */
330 case 14: /* Timestamp Reply */
336 /* How do we spot broadcast destination addresses? */
337 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
338 source=ntohl(iph->saddr);
339 if (source==0) return False;
340 if ((source&0xff000000)==0x7f000000) return False;
341 /* How do we spot broadcast source addresses? */
342 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
343 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
347 /* How much of the original IP packet do we include in its ICMP
348 response? The header plus up to 64 bits. */
351 4.3.2.3 Original Message Header
353 Historically, every ICMP error message has included the Internet
354 header and at least the first 8 data bytes of the datagram that
355 triggered the error. This is no longer adequate, due to the use of
356 IP-in-IP tunneling and other technologies. Therefore, the ICMP
357 datagram SHOULD contain as much of the original datagram as possible
358 without the length of the ICMP datagram exceeding 576 bytes. The
359 returned IP header (and user data) MUST be identical to that which
360 was received, except that the router is not required to undo any
361 modifications to the IP header that are normally performed in
362 forwarding that were performed before the error was detected (e.g.,
363 decrementing the TTL, or updating options). Note that the
364 requirements of Section [4.3.3.5] supersede this requirement in some
365 cases (i.e., for a Parameter Problem message, if the problem is in a
366 modified field, the router must undo the modification). See Section
369 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
371 if (buf->size < (int)sizeof(struct iphdr)) return 0;
372 struct iphdr *iph=(struct iphdr *)buf->start;
376 /* We include the first 8 bytes of the packet data, provided they exist */
378 plen=ntohs(iph->tot_len);
379 return (hlen>plen?plen:hlen);
382 /* client indicates where the packet we're constructing a response to
383 comes from. NULL indicates the host. */
384 static void netlink_icmp_simple(struct netlink *st,
385 struct netlink_client *origsender,
386 struct buffer_if *buf,
387 uint8_t type, uint8_t code,
388 union icmpinfofield info)
393 if (netlink_icmp_may_reply(buf)) {
394 struct iphdr *iph=(struct iphdr *)buf->start;
395 len=netlink_icmp_reply_len(buf);
396 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
397 h->type=type; h->code=code; h->d=info;
398 memcpy(buf_append(&st->icmp,len),buf->start,len);
399 netlink_icmp_csum(h);
400 netlink_packet_deliver(st,NULL,&st->icmp);
401 BUF_ASSERT_FREE(&st->icmp);
406 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
408 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
410 * Is the datagram acceptable?
412 * 1. Length at least the size of an ip header
414 * 3. Checksums correctly.
415 * 4. Doesn't have a bogus length
417 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
418 char *errmsgbuf, int errmsgbuflen)
420 #define BAD(...) do{ \
421 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
425 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
426 struct iphdr *iph=(struct iphdr *)buf->start;
429 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
430 if (iph->version != 4) BAD("version %u",iph->version);
431 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
432 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
433 len=ntohs(iph->tot_len);
434 /* There should be no padding */
435 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
436 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
437 /* XXX check that there's no source route specified */
443 static const char *fragment_filter_header(uint8_t *base, long *hlp)
445 const int fixedhl = sizeof(struct iphdr);
447 const uint8_t *ipend = base + hl;
448 uint8_t *op = base + fixedhl;
449 const uint8_t *ip = op;
453 int remain = ipend - ip;
454 if (opt == 0x00) /* End of Options List */ break;
455 if (opt == 0x01) /* No Operation */ continue;
456 if (remain < 2) return "IPv4 options truncated at length";
458 if (remain < optlen) return "IPv4 options truncated in option";
459 if (opt & 0x80) /* copy */ {
460 memmove(op, ip, optlen);
465 while ((hl = (op - base)) & 0x3)
466 *op++ = 0x00 /* End of Option List */;
467 ((struct iphdr*)base)->ihl = hl >> 2;
473 /* Fragment or send ICMP Fragmentation Needed */
474 static void netlink_maybe_fragment(struct netlink *st,
475 struct netlink_client *sender,
476 netlink_deliver_fn *deliver,
478 const char *delivery_name,
480 uint32_t source, uint32_t dest,
481 struct buffer_if *buf)
483 struct iphdr *iph=(struct iphdr*)buf->start;
484 long hl = iph->ihl*4;
485 const char *ssource = ipaddr_to_string(source);
487 if (buf->size <= mtu) {
488 deliver(deliver_dst, buf);
492 MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
493 st->name, ssource, delivery_name, buf->size);
495 #define BADFRAG(m, ...) \
497 "%s: fragmenting packet from source %s" \
498 " for transmission via %s: " m "\n", \
499 st->name, ssource, delivery_name, \
502 unsigned orig_frag = ntohs(iph->frag);
504 if (orig_frag&IPHDR_FRAG_DONT) {
505 union icmpinfofield info =
506 { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
507 netlink_icmp_simple(st,sender,buf,
508 ICMP_TYPE_UNREACHABLE,
509 ICMP_CODE_FRAGMENTATION_REQUIRED,
515 BADFRAG("mtu %"PRId32" too small", mtu);
520 /* we (ab)use the icmp buffer to stash the original packet */
521 struct buffer_if *orig = &st->icmp;
522 BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
523 buffer_copy(orig,buf);
526 const uint8_t *startindata = orig->start + hl;
527 const uint8_t *indata = startindata;
528 const uint8_t *endindata = orig->start + orig->size;
532 /* compute our fragment offset */
533 long dataoffset = indata - startindata
534 + (orig_frag & IPHDR_FRAG_OFF)*8;
535 assert(!(dataoffset & 7));
536 if (dataoffset > IPHDR_FRAG_OFF*8) {
537 BADFRAG("ultimate fragment offset out of range");
541 BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
542 buffer_init(buf,calculate_max_start_pad());
544 /* copy header (possibly filtered); will adjust in a bit */
545 struct iphdr *fragh = buf_append(buf, hl);
546 memcpy(fragh, orig->start, hl);
548 /* decide how much payload to copy and copy it */
549 long avail = mtu - hl;
550 long remain = endindata - indata;
551 long use = avail < remain ? (avail & ~(long)7) : remain;
552 memcpy(buf_append(buf, use), indata, use);
555 _Bool last_frag = indata >= endindata;
557 /* adjust the header */
558 fragh->tot_len = htons(buf->size);
560 htons((orig_frag & ~IPHDR_FRAG_OFF) |
561 (last_frag ? 0 : IPHDR_FRAG_MORE) |
564 fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
566 /* actually send it */
567 deliver(deliver_dst, buf);
571 /* after copying the header for the first frag,
572 * we filter the header for the remaining frags */
574 const char *bad = fragment_filter_header(orig->start, &hl);
575 if (bad) { BADFRAG("%s", bad); break; }
584 /* Deliver a packet _to_ client; used after we have decided
585 * what to do with it (and just to check that the client has
586 * actually registered a delivery function with us). */
587 static void netlink_client_deliver(struct netlink *st,
588 struct netlink_client *client,
589 uint32_t source, uint32_t dest,
590 struct buffer_if *buf)
592 if (!client->deliver) {
594 s=ipaddr_to_string(source);
595 d=ipaddr_to_string(dest);
596 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
602 netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name,
603 client->mtu, source,dest,buf);
607 /* Deliver a packet to the host; used after we have decided that that
608 * is what to do with it. */
609 static void netlink_host_deliver(struct netlink *st,
610 struct netlink_client *sender,
611 uint32_t source, uint32_t dest,
612 struct buffer_if *buf)
614 netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)",
615 st->mtu, source,dest,buf);
619 /* Deliver a packet. "sender"==NULL for packets from the host and packets
620 generated internally in secnet. */
621 static void netlink_packet_deliver(struct netlink *st,
622 struct netlink_client *sender,
623 struct buffer_if *buf)
625 if (buf->size < (int)sizeof(struct iphdr)) {
626 Message(M_ERR,"%s: trying to deliver a too-short packet"
627 " from %s!\n",st->name, sender_name(sender));
632 struct iphdr *iph=(struct iphdr *)buf->start;
633 uint32_t dest=ntohl(iph->daddr);
634 uint32_t source=ntohl(iph->saddr);
635 uint32_t best_quality;
636 bool_t allow_route=False;
637 bool_t found_allowed=False;
641 BUF_ASSERT_USED(buf);
643 if (dest==st->secnet_address) {
644 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
649 /* Packets from the host (sender==NULL) may always be routed. Packets
650 from clients with the allow_route option will also be routed. */
651 if (!sender || (sender && (sender->options & OPT_ALLOWROUTE)))
654 /* If !allow_route, we check the routing table anyway, and if
655 there's a suitable route with OPT_ALLOWROUTE set we use it. If
656 there's a suitable route, but none with OPT_ALLOWROUTE set then
657 we generate ICMP 'communication with destination network
658 administratively prohibited'. */
662 for (i=0; i<st->n_clients; i++) {
663 if (st->routes[i]->up &&
664 ipset_contains_addr(st->routes[i]->networks,dest)) {
665 /* It's an available route to the correct destination. But is
666 it better than the one we already have? */
668 /* If we have already found an allowed route then we don't
669 bother looking at routes we're not allowed to use. If
670 we don't yet have an allowed route we'll consider any. */
671 if (!allow_route && found_allowed) {
672 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
675 if (st->routes[i]->link_quality>best_quality
676 || best_quality==0) {
677 best_quality=st->routes[i]->link_quality;
679 if (st->routes[i]->options&OPT_ALLOWROUTE)
681 /* If quality isn't perfect we may wish to
682 consider kicking the tunnel with a 0-length
683 packet to prompt it to perform a key setup.
684 Then it'll eventually decide it's up or
686 /* If quality is perfect and we're allowed to use the
687 route we don't need to search any more. */
688 if (best_quality>=MAXIMUM_LINK_QUALITY &&
689 (allow_route || found_allowed)) break;
693 if (best_match==-1) {
694 /* The packet's not going down a tunnel. It might (ought to)
696 if (ipset_contains_addr(st->networks,dest)) {
697 netlink_host_deliver(st,sender,source,dest,buf);
698 BUF_ASSERT_FREE(buf);
701 s=ipaddr_to_string(source);
702 d=ipaddr_to_string(dest);
703 Message(M_DEBUG,"%s: don't know where to deliver packet "
704 "(s=%s, d=%s)\n", st->name, s, d);
706 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
707 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
712 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
714 s=ipaddr_to_string(source);
715 d=ipaddr_to_string(dest);
716 /* We have a usable route but aren't allowed to use it.
717 Generate ICMP destination unreachable: communication
718 with destination network administratively prohibited */
719 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
723 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
724 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
727 if (best_quality>0) {
728 netlink_client_deliver(st,st->routes[best_match],
730 BUF_ASSERT_FREE(buf);
732 /* Generate ICMP destination unreachable */
733 netlink_icmp_simple(st,sender,buf,
734 ICMP_TYPE_UNREACHABLE,
735 ICMP_CODE_NET_UNREACHABLE,
741 BUF_ASSERT_FREE(buf);
744 static void netlink_packet_forward(struct netlink *st,
745 struct netlink_client *sender,
746 struct buffer_if *buf)
748 if (buf->size < (int)sizeof(struct iphdr)) return;
749 struct iphdr *iph=(struct iphdr *)buf->start;
751 BUF_ASSERT_USED(buf);
753 /* Packet has already been checked */
755 /* Generate ICMP time exceeded */
756 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED,
757 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
763 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
765 netlink_packet_deliver(st,sender,buf);
766 BUF_ASSERT_FREE(buf);
769 /* Deal with packets addressed explicitly to us */
770 static void netlink_packet_local(struct netlink *st,
771 struct netlink_client *sender,
772 struct buffer_if *buf)
778 if (buf->size < (int)sizeof(struct icmphdr)) {
779 Message(M_WARNING,"%s: short packet addressed to secnet; "
780 "ignoring it\n",st->name);
784 h=(struct icmphdr *)buf->start;
786 unsigned fraginfo = ntohs(h->iph.frag);
787 if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
788 if (!(fraginfo & IPHDR_FRAG_OFF))
789 /* report only for first fragment */
790 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
791 "ignoring it\n",st->name);
796 if (h->iph.protocol==1) {
798 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
799 /* ICMP echo-request. Special case: we re-use the buffer
800 to construct the reply. */
801 h->type=ICMP_TYPE_ECHO_REPLY;
802 h->iph.daddr=h->iph.saddr;
803 h->iph.saddr=htonl(st->secnet_address);
806 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
807 netlink_icmp_csum(h);
808 netlink_packet_deliver(st,NULL,buf);
811 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
813 /* Send ICMP protocol unreachable */
814 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
815 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
823 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
825 static void netlink_incoming(struct netlink *st, struct netlink_client *sender,
826 struct buffer_if *buf)
828 uint32_t source,dest;
831 const char *sourcedesc=sender?sender->name:"host";
833 BUF_ASSERT_USED(buf);
835 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
836 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
842 assert(buf->size >= (int)sizeof(struct iphdr));
843 iph=(struct iphdr *)buf->start;
845 source=ntohl(iph->saddr);
846 dest=ntohl(iph->daddr);
848 /* Check source. If we don't like the source, there's no point
849 generating ICMP because we won't know how to get it to the
850 source of the packet. */
852 /* Check that the packet source is appropriate for the tunnel
854 if (!ipset_contains_addr(sender->networks,source)) {
856 s=ipaddr_to_string(source);
857 d=ipaddr_to_string(dest);
858 Message(M_WARNING,"%s: packet from tunnel %s with bad "
859 "source address (s=%s,d=%s)\n",st->name,sender->name,s,d);
865 /* Check that the packet originates in our configured local
866 network, and hasn't been forwarded from elsewhere or
867 generated with the wrong source address */
868 if (!ipset_contains_addr(st->networks,source)) {
870 s=ipaddr_to_string(source);
871 d=ipaddr_to_string(dest);
872 Message(M_WARNING,"%s: outgoing packet with bad source address "
873 "(s=%s,d=%s)\n",st->name,s,d);
880 /* If this is a point-to-point device we don't examine the
881 destination address at all; we blindly send it down our
882 one-and-only registered tunnel, or to the host, depending on
883 where it came from. It's up to external software to check
884 address validity and generate ICMP, etc. */
887 netlink_host_deliver(st,sender,source,dest,buf);
889 netlink_client_deliver(st,st->clients,source,dest,buf);
891 BUF_ASSERT_FREE(buf);
895 /* st->secnet_address needs checking before matching destination
897 if (dest==st->secnet_address) {
898 netlink_packet_local(st,sender,buf);
899 BUF_ASSERT_FREE(buf);
902 netlink_packet_forward(st,sender,buf);
903 BUF_ASSERT_FREE(buf);
906 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
908 struct netlink_client *c=sst;
909 struct netlink *st=c->nst;
911 netlink_incoming(st,c,buf);
914 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
916 struct netlink *st=sst;
918 netlink_incoming(st,NULL,buf);
921 static void netlink_set_quality(void *sst, uint32_t quality)
923 struct netlink_client *c=sst;
924 struct netlink *st=c->nst;
926 c->link_quality=quality;
927 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
928 if (c->options&OPT_SOFTROUTE) {
929 st->set_routes(st->dst,c);
933 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
934 struct subnet_list *snets)
939 for (i=0; i<snets->entries; i++) {
940 net=subnet_to_string(snets->list[i]);
941 Message(loglevel,"%s ",net);
946 static void netlink_dump_routes(struct netlink *st, bool_t requested)
952 if (requested) c=M_WARNING;
954 net=ipaddr_to_string(st->secnet_address);
955 Message(c,"%s: point-to-point (remote end is %s); routes: ",
958 netlink_output_subnets(st,c,st->clients->subnets);
961 Message(c,"%s: routing table:\n",st->name);
962 for (i=0; i<st->n_clients; i++) {
963 netlink_output_subnets(st,c,st->routes[i]->subnets);
964 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
965 "quality %d,use %d,pri %lu)\n",
967 st->routes[i]->up?"up":"down",
969 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
970 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
971 st->routes[i]->link_quality,
972 st->routes[i]->outcount,
973 (unsigned long)st->routes[i]->priority);
975 net=ipaddr_to_string(st->secnet_address);
976 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
977 net,st->name,st->localcount);
979 for (i=0; i<st->subnets->entries; i++) {
980 net=subnet_to_string(st->subnets->list[i]);
981 Message(c,"%s ",net);
985 Message(c,"-> host (use %d)\n",st->outcount);
989 /* ap is a pointer to a member of the routes array */
990 static int netlink_compare_client_priority(const void *ap, const void *bp)
992 const struct netlink_client *const*a=ap;
993 const struct netlink_client *const*b=bp;
995 if ((*a)->priority==(*b)->priority) return 0;
996 if ((*a)->priority<(*b)->priority) return 1;
1000 static void netlink_phase_hook(void *sst, uint32_t new_phase)
1002 struct netlink *st=sst;
1003 struct netlink_client *c;
1006 /* All the networks serviced by the various tunnels should now
1007 * have been registered. We build a routing table by sorting the
1008 * clients by priority. */
1009 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
1010 "netlink_phase_hook");
1011 /* Fill the table */
1013 for (c=st->clients; c; c=c->next) {
1017 /* Sort the table in descending order of priority */
1018 qsort(st->routes,st->n_clients,sizeof(*st->routes),
1019 netlink_compare_client_priority);
1021 netlink_dump_routes(st,False);
1024 static void netlink_signal_handler(void *sst, int signum)
1026 struct netlink *st=sst;
1027 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
1028 netlink_dump_routes(st,True);
1031 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
1033 struct netlink_client *c=sst;
1038 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
1039 void *dst, uint32_t *localmtu_r)
1041 struct netlink_client *c=sst;
1042 struct netlink *st=c->nst;
1048 *localmtu_r=st->mtu;
1051 static struct flagstr netlink_option_table[]={
1052 { "soft", OPT_SOFTROUTE },
1053 { "allow-route", OPT_ALLOWROUTE },
1056 /* This is the routine that gets called when the closure that's
1057 returned by an invocation of a netlink device closure (eg. tun,
1058 userv-ipif) is invoked. It's used to create routes and pass in
1059 information about them; the closure it returns is used by site
1061 static closure_t *netlink_inst_create(struct netlink *st,
1062 struct cloc loc, dict_t *dict)
1064 struct netlink_client *c;
1066 struct ipset *networks;
1067 uint32_t options,priority;
1071 name=dict_read_string(dict, "name", True, st->name, loc);
1073 l=dict_lookup(dict,"routes");
1075 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
1076 networks=string_list_to_ipset(l,loc,st->name,"routes");
1077 options=string_list_to_word(dict_lookup(dict,"options"),
1078 netlink_option_table,st->name);
1080 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
1081 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
1083 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
1084 cfgfatal(loc,st->name,"this netlink device does not support "
1089 if (options&OPT_SOFTROUTE) {
1090 /* XXX for now we assume that soft routes require root privilege;
1091 this may not always be true. The device driver can tell us. */
1092 require_root_privileges=True;
1093 require_root_privileges_explanation="netlink: soft routes";
1095 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
1101 /* Check that nets are a subset of st->remote_networks;
1102 refuse to register if they are not. */
1103 if (!ipset_is_subset(st->remote_networks,networks)) {
1104 cfgfatal(loc,st->name,"routes are not allowed\n");
1108 c=safe_malloc(sizeof(*c),"netlink_inst_create");
1109 c->cl.description=name;
1110 c->cl.type=CL_NETLINK;
1112 c->cl.interface=&c->ops;
1114 c->ops.reg=netlink_inst_reg;
1115 c->ops.deliver=netlink_inst_incoming;
1116 c->ops.set_quality=netlink_set_quality;
1117 c->ops.set_mtu=netlink_inst_set_mtu;
1120 c->networks=networks;
1121 c->subnets=ipset_to_subnet_list(networks);
1122 c->priority=priority;
1126 c->link_quality=LINK_QUALITY_UNUSED;
1127 c->mtu=mtu?mtu:st->mtu;
1132 c->next=st->clients;
1134 assert(st->n_clients < INT_MAX);
1140 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
1141 dict_t *context, list_t *args)
1143 struct netlink *st=self->interface;
1149 item=list_elem(args,0);
1150 if (!item || item->type!=t_dict) {
1151 cfgfatal(loc,st->name,"must have a dictionary argument\n");
1153 dict=item->data.dict;
1155 cl=netlink_inst_create(st,loc,dict);
1157 return new_closure(cl);
1160 netlink_deliver_fn *netlink_init(struct netlink *st,
1161 void *dst, struct cloc loc,
1162 dict_t *dict, cstring_t description,
1163 netlink_route_fn *set_routes,
1164 netlink_deliver_fn *to_host)
1170 st->cl.description=description;
1171 st->cl.type=CL_PURE;
1172 st->cl.apply=netlink_inst_apply;
1173 st->cl.interface=st;
1177 st->set_routes=set_routes;
1178 st->deliver_to_host=to_host;
1180 st->name=dict_read_string(dict,"name",False,description,loc);
1181 if (!st->name) st->name=description;
1182 l=dict_lookup(dict,"networks");
1184 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1186 struct ipset *empty;
1188 st->networks=ipset_complement(empty);
1191 l=dict_lookup(dict,"remote-networks");
1193 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1196 struct ipset *empty;
1198 st->remote_networks=ipset_complement(empty);
1201 st->local_address=string_item_to_ipaddr(
1202 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
1204 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1205 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1207 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1208 "ptp-address in the same netlink device\n");
1210 if (!(sa || ptpa)) {
1211 cfgfatal(loc,st->name,"you must specify secnet-address or "
1212 "ptp-address for this netlink device\n");
1215 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1218 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1221 /* To be strictly correct we could subtract secnet_address from
1222 networks here. It shouldn't make any practical difference,
1223 though, and will make the route dump look complicated... */
1224 st->subnets=ipset_to_subnet_list(st->networks);
1225 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1226 buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
1230 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1231 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1233 /* If we're point-to-point then we return a CL_NETLINK directly,
1234 rather than a CL_NETLINK_OLD or pure closure (depending on
1235 compatibility). This CL_NETLINK is for our one and only
1236 client. Our cl.apply function is NULL. */
1239 cl=netlink_inst_create(st,loc,dict);
1242 return netlink_dev_incoming;
1245 /* No connection to the kernel at all... */
1251 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1253 struct null *st=sst;
1255 if (routes->up!=routes->kup) {
1256 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1257 st->nl.name,routes->name,
1258 routes->up?"up":"down");
1259 routes->kup=routes->up;
1265 static void null_deliver(void *sst, struct buffer_if *buf)
1270 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1277 st=safe_malloc(sizeof(*st),"null_apply");
1279 item=list_elem(args,0);
1280 if (!item || item->type!=t_dict)
1281 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1283 dict=item->data.dict;
1285 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1288 return new_closure(&st->nl.cl);
1291 void netlink_module(dict_t *dict)
1293 add_closure(dict,"null-netlink",null_apply);