1 /* User-kernel network link */
3 /* See RFCs 791, 792, 1123 and 1812 */
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
14 /* Points to note from RFC1812 (which may require changes in this
17 3.3.4 Maximum Transmission Unit - MTU
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
29 4.2.1 A router SHOULD count datagrams discarded.
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
35 5.3.13.4 Source Route Options
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
42 5.3.13.5 Record Route Option
44 Routers MUST support the Record Route option in forwarded packets.
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
54 5.3.13.6 Timestamp Option
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
110 #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111 #else /* !NETLINK_DEBUG */
112 #define MDEBUG(...) ((void)0)
113 #endif /* !NETLINK_DEBUG */
115 #define ICMP_TYPE_ECHO_REPLY 0
117 #define ICMP_TYPE_UNREACHABLE 3
118 #define ICMP_CODE_NET_UNREACHABLE 0
119 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121 #define ICMP_CODE_NET_PROHIBITED 13
123 #define ICMP_TYPE_ECHO_REQUEST 8
125 #define ICMP_TYPE_TIME_EXCEEDED 11
126 #define ICMP_CODE_TTL_EXCEEDED 0
128 /* Generic IP checksum routine */
129 static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
131 register uint32_t sum=0;
134 sum+=ntohs(*(uint16_t *)iph);
139 sum+=*(uint8_t *)iph;
141 sum=(sum&0xffff)+(sum>>16);
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
153 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
156 __asm__ __volatile__(
162 "adcl 12(%1), %0 ;\n"
163 "1: adcl 16(%1), %0 ;\n"
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
178 : "1" (iph), "2" (ihl)
183 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl)
185 assert(ihl < INT_MAX/4);
186 return ip_csum(iph,ihl*4);
191 #if defined (WORDS_BIGENDIAN)
202 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205 /* reserved 0x8000 */
211 /* The options start here. */
219 union icmpinfofield {
238 static const union icmpinfofield icmp_noinfo;
240 static void netlink_client_deliver(struct netlink *st,
241 struct netlink_client *client,
242 uint32_t source, uint32_t dest,
243 struct buffer_if *buf);
244 static void netlink_host_deliver(struct netlink *st,
245 struct netlink_client *sender,
246 uint32_t source, uint32_t dest,
247 struct buffer_if *buf);
249 static const char *sender_name(struct netlink_client *sender /* or NULL */)
251 return sender?sender->name:"(local)";
254 static void netlink_packet_deliver(struct netlink *st,
255 struct netlink_client *client,
256 struct buffer_if *buf);
258 /* XXX RFC1812 4.3.2.5:
259 All other ICMP error messages (Destination Unreachable,
260 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
261 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
262 CONTROL). The IP Precedence value for these error messages MAY be
265 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
266 uint32_t source, uint32_t dest,
271 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
272 buffer_init(&st->icmp,calculate_max_start_pad());
273 h=buf_append(&st->icmp,sizeof(*h));
278 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
281 h->iph.ttl=255; /* XXX should be configurable */
283 h->iph.saddr=htonl(source);
284 h->iph.daddr=htonl(dest);
286 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
293 /* Fill in the ICMP checksum field correctly */
294 static void netlink_icmp_csum(struct icmphdr *h)
298 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
300 h->check=ip_csum(&h->type,len);
304 * An ICMP error message MUST NOT be sent as the result of
307 * * an ICMP error message, or
309 * * a datagram destined to an IP broadcast or IP multicast
312 * * a datagram sent as a link-layer broadcast, or
314 * * a non-initial fragment, or
316 * * a datagram whose source address does not define a single
317 * host -- e.g., a zero address, a loopback address, a
318 * broadcast address, a multicast address, or a Class E
321 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
324 struct icmphdr *icmph;
327 if (buf->size < (int)sizeof(struct icmphdr)) return False;
328 iph=(struct iphdr *)buf->start;
329 icmph=(struct icmphdr *)buf->start;
330 if (iph->protocol==1) {
331 switch(icmph->type) {
332 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
333 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
334 * Deprecated, reserved, unassigned and experimental
335 * options are treated as not safe to reply to.
337 case 0: /* Echo Reply */
339 case 13: /* Timestamp */
340 case 14: /* Timestamp Reply */
346 /* How do we spot broadcast destination addresses? */
347 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
348 source=ntohl(iph->saddr);
349 if (source==0) return False;
350 if ((source&0xff000000)==0x7f000000) return False;
351 /* How do we spot broadcast source addresses? */
352 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
353 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
357 /* How much of the original IP packet do we include in its ICMP
358 response? The header plus up to 64 bits. */
361 4.3.2.3 Original Message Header
363 Historically, every ICMP error message has included the Internet
364 header and at least the first 8 data bytes of the datagram that
365 triggered the error. This is no longer adequate, due to the use of
366 IP-in-IP tunneling and other technologies. Therefore, the ICMP
367 datagram SHOULD contain as much of the original datagram as possible
368 without the length of the ICMP datagram exceeding 576 bytes. The
369 returned IP header (and user data) MUST be identical to that which
370 was received, except that the router is not required to undo any
371 modifications to the IP header that are normally performed in
372 forwarding that were performed before the error was detected (e.g.,
373 decrementing the TTL, or updating options). Note that the
374 requirements of Section [4.3.3.5] supersede this requirement in some
375 cases (i.e., for a Parameter Problem message, if the problem is in a
376 modified field, the router must undo the modification). See Section
379 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
381 if (buf->size < (int)sizeof(struct iphdr)) return 0;
382 struct iphdr *iph=(struct iphdr *)buf->start;
386 /* We include the first 8 bytes of the packet data, provided they exist */
388 plen=ntohs(iph->tot_len);
389 return (hlen>plen?plen:hlen);
392 /* client indicates where the packet we're constructing a response to
393 comes from. NULL indicates the host. */
394 static void netlink_icmp_simple(struct netlink *st,
395 struct netlink_client *origsender,
396 struct buffer_if *buf,
397 uint8_t type, uint8_t code,
398 union icmpinfofield info)
403 if (netlink_icmp_may_reply(buf)) {
404 struct iphdr *iph=(struct iphdr *)buf->start;
406 uint32_t icmpdest = ntohl(iph->saddr);
408 const char *icmpsourcedebugprefix;
410 icmpsource=st->secnet_address;
411 icmpsourcedebugprefix="";
412 } else if (origsender) {
413 /* was from peer, send reply as if from host */
414 icmpsource=st->local_address;
415 icmpsourcedebugprefix="L!";
417 /* was from host, send reply as if from peer */
418 icmpsource=st->secnet_address; /* actually, peer address */
419 icmpsourcedebugprefix="P!";
421 MDEBUG("%s: generating ICMP re %s[%s]->[%s]:"
422 " from %s%s type=%u code=%u\n",
423 st->name, sender_name(origsender),
424 ipaddr_to_string(ntohl(iph->saddr)),
425 ipaddr_to_string(ntohl(iph->daddr)),
426 icmpsourcedebugprefix,
427 ipaddr_to_string(icmpsource),
430 len=netlink_icmp_reply_len(buf);
431 h=netlink_icmp_tmpl(st,icmpsource,icmpdest,len);
432 h->type=type; h->code=code; h->d=info;
433 memcpy(buf_append(&st->icmp,len),buf->start,len);
434 netlink_icmp_csum(h);
437 netlink_packet_deliver(st,NULL,&st->icmp);
438 } else if (origsender) {
439 netlink_client_deliver(st,origsender,icmpsource,icmpdest,&st->icmp);
441 netlink_host_deliver(st,NULL,icmpsource,icmpdest,&st->icmp);
443 BUF_ASSERT_FREE(&st->icmp);
448 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
450 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
452 * Is the datagram acceptable?
454 * 1. Length at least the size of an ip header
456 * 3. Checksums correctly.
457 * 4. Doesn't have a bogus length
459 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
460 char *errmsgbuf, int errmsgbuflen)
462 #define BAD(...) do{ \
463 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
467 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
468 struct iphdr *iph=(struct iphdr *)buf->start;
471 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
472 if (iph->version != 4) BAD("version %u",iph->version);
473 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
474 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
475 len=ntohs(iph->tot_len);
476 /* There should be no padding */
477 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
478 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
479 /* XXX check that there's no source route specified */
485 static const char *fragment_filter_header(uint8_t *base, long *hlp)
487 const int fixedhl = sizeof(struct iphdr);
489 const uint8_t *ipend = base + hl;
490 uint8_t *op = base + fixedhl;
491 const uint8_t *ip = op;
495 int remain = ipend - ip;
496 if (opt == 0x00) /* End of Options List */ break;
497 if (opt == 0x01) /* No Operation */ continue;
498 if (remain < 2) return "IPv4 options truncated at length";
500 if (remain < optlen) return "IPv4 options truncated in option";
501 if (opt & 0x80) /* copy */ {
502 memmove(op, ip, optlen);
507 while ((hl = (op - base)) & 0x3)
508 *op++ = 0x00 /* End of Option List */;
509 ((struct iphdr*)base)->ihl = hl >> 2;
515 /* Fragment or send ICMP Fragmentation Needed */
516 static void netlink_maybe_fragment(struct netlink *st,
517 struct netlink_client *sender,
518 netlink_deliver_fn *deliver,
520 const char *delivery_name,
522 uint32_t source, uint32_t dest,
523 struct buffer_if *buf)
525 struct iphdr *iph=(struct iphdr*)buf->start;
526 long hl = iph->ihl*4;
527 const char *ssource = ipaddr_to_string(source);
529 if (buf->size <= mtu) {
530 deliver(deliver_dst, buf);
534 MDEBUG("%s: fragmenting %s->%s org.size=%"PRId32"\n",
535 st->name, ssource, delivery_name, buf->size);
537 #define BADFRAG(m, ...) \
539 "%s: fragmenting packet from source %s" \
540 " for transmission via %s: " m "\n", \
541 st->name, ssource, delivery_name, \
544 unsigned orig_frag = ntohs(iph->frag);
546 if (orig_frag&IPHDR_FRAG_DONT) {
547 union icmpinfofield info =
548 { .fragneeded = { .unused = 0, .mtu = htons(mtu) } };
549 netlink_icmp_simple(st,sender,buf,
550 ICMP_TYPE_UNREACHABLE,
551 ICMP_CODE_FRAGMENTATION_REQUIRED,
557 BADFRAG("mtu %"PRId32" too small", mtu);
562 /* we (ab)use the icmp buffer to stash the original packet */
563 struct buffer_if *orig = &st->icmp;
564 BUF_ALLOC(orig,"netlink_client_deliver fragment orig");
565 buffer_copy(orig,buf);
568 const uint8_t *startindata = orig->start + hl;
569 const uint8_t *indata = startindata;
570 const uint8_t *endindata = orig->start + orig->size;
574 /* compute our fragment offset */
575 long dataoffset = indata - startindata
576 + (orig_frag & IPHDR_FRAG_OFF)*8;
577 assert(!(dataoffset & 7));
578 if (dataoffset > IPHDR_FRAG_OFF*8) {
579 BADFRAG("ultimate fragment offset out of range");
583 BUF_ALLOC(buf,"netlink_client_deliver fragment frag");
584 buffer_init(buf,calculate_max_start_pad());
586 /* copy header (possibly filtered); will adjust in a bit */
587 struct iphdr *fragh = buf_append(buf, hl);
588 memcpy(fragh, orig->start, hl);
590 /* decide how much payload to copy and copy it */
591 long avail = mtu - hl;
592 long remain = endindata - indata;
593 long use = avail < remain ? (avail & ~(long)7) : remain;
594 memcpy(buf_append(buf, use), indata, use);
597 _Bool last_frag = indata >= endindata;
599 /* adjust the header */
600 fragh->tot_len = htons(buf->size);
602 htons((orig_frag & ~IPHDR_FRAG_OFF) |
603 (last_frag ? 0 : IPHDR_FRAG_MORE) |
606 fragh->check = ip_fast_csum((const void*)fragh, fragh->ihl);
608 /* actually send it */
609 deliver(deliver_dst, buf);
613 /* after copying the header for the first frag,
614 * we filter the header for the remaining frags */
616 const char *bad = fragment_filter_header(orig->start, &hl);
617 if (bad) { BADFRAG("%s", bad); break; }
626 /* Deliver a packet _to_ client; used after we have decided
627 * what to do with it (and just to check that the client has
628 * actually registered a delivery function with us). */
629 static void netlink_client_deliver(struct netlink *st,
630 struct netlink_client *client,
631 uint32_t source, uint32_t dest,
632 struct buffer_if *buf)
634 if (!client->deliver) {
636 s=ipaddr_to_string(source);
637 d=ipaddr_to_string(dest);
638 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
644 netlink_maybe_fragment(st,NULL, client->deliver,client->dst,client->name,
645 client->mtu, source,dest,buf);
649 /* Deliver a packet to the host; used after we have decided that that
650 * is what to do with it. */
651 static void netlink_host_deliver(struct netlink *st,
652 struct netlink_client *sender,
653 uint32_t source, uint32_t dest,
654 struct buffer_if *buf)
656 netlink_maybe_fragment(st,sender, st->deliver_to_host,st->dst,"(host)",
657 st->mtu, source,dest,buf);
661 /* Deliver a packet. "sender"==NULL for packets from the host and packets
662 generated internally in secnet. */
663 static void netlink_packet_deliver(struct netlink *st,
664 struct netlink_client *sender,
665 struct buffer_if *buf)
667 if (buf->size < (int)sizeof(struct iphdr)) {
668 Message(M_ERR,"%s: trying to deliver a too-short packet"
669 " from %s!\n",st->name, sender_name(sender));
674 struct iphdr *iph=(struct iphdr *)buf->start;
675 uint32_t dest=ntohl(iph->daddr);
676 uint32_t source=ntohl(iph->saddr);
677 uint32_t best_quality;
678 bool_t allow_route=False;
679 bool_t found_allowed=False;
683 BUF_ASSERT_USED(buf);
685 if (dest==st->secnet_address) {
686 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
691 /* Packets from the host (sender==NULL) may always be routed. Packets
692 from clients with the allow_route option will also be routed. */
693 if (!sender || (sender && (sender->options & OPT_ALLOWROUTE)))
696 /* If !allow_route, we check the routing table anyway, and if
697 there's a suitable route with OPT_ALLOWROUTE set we use it. If
698 there's a suitable route, but none with OPT_ALLOWROUTE set then
699 we generate ICMP 'communication with destination network
700 administratively prohibited'. */
704 for (i=0; i<st->n_clients; i++) {
705 if (st->routes[i]->up &&
706 ipset_contains_addr(st->routes[i]->networks,dest)) {
707 /* It's an available route to the correct destination. But is
708 it better than the one we already have? */
710 /* If we have already found an allowed route then we don't
711 bother looking at routes we're not allowed to use. If
712 we don't yet have an allowed route we'll consider any. */
713 if (!allow_route && found_allowed) {
714 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
717 if (st->routes[i]->link_quality>best_quality
718 || best_quality==0) {
719 best_quality=st->routes[i]->link_quality;
721 if (st->routes[i]->options&OPT_ALLOWROUTE)
723 /* If quality isn't perfect we may wish to
724 consider kicking the tunnel with a 0-length
725 packet to prompt it to perform a key setup.
726 Then it'll eventually decide it's up or
728 /* If quality is perfect and we're allowed to use the
729 route we don't need to search any more. */
730 if (best_quality>=MAXIMUM_LINK_QUALITY &&
731 (allow_route || found_allowed)) break;
735 if (best_match==-1) {
736 /* The packet's not going down a tunnel. It might (ought to)
738 if (ipset_contains_addr(st->networks,dest)) {
739 netlink_host_deliver(st,sender,source,dest,buf);
740 BUF_ASSERT_FREE(buf);
743 s=ipaddr_to_string(source);
744 d=ipaddr_to_string(dest);
745 Message(M_DEBUG,"%s: don't know where to deliver packet "
746 "(s=%s, d=%s)\n", st->name, s, d);
748 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
749 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
754 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
756 s=ipaddr_to_string(source);
757 d=ipaddr_to_string(dest);
758 /* We have a usable route but aren't allowed to use it.
759 Generate ICMP destination unreachable: communication
760 with destination network administratively prohibited */
761 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
765 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
766 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
769 if (best_quality>0) {
770 netlink_client_deliver(st,st->routes[best_match],
772 BUF_ASSERT_FREE(buf);
774 /* Generate ICMP destination unreachable */
775 netlink_icmp_simple(st,sender,buf,
776 ICMP_TYPE_UNREACHABLE,
777 ICMP_CODE_NET_UNREACHABLE,
783 BUF_ASSERT_FREE(buf);
786 static void netlink_packet_forward(struct netlink *st,
787 struct netlink_client *sender,
788 struct buffer_if *buf)
790 if (buf->size < (int)sizeof(struct iphdr)) return;
791 struct iphdr *iph=(struct iphdr *)buf->start;
793 BUF_ASSERT_USED(buf);
795 /* Packet has already been checked */
797 /* Generate ICMP time exceeded */
798 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_TIME_EXCEEDED,
799 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
805 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
807 netlink_packet_deliver(st,sender,buf);
808 BUF_ASSERT_FREE(buf);
811 /* Deal with packets addressed explicitly to us */
812 static void netlink_packet_local(struct netlink *st,
813 struct netlink_client *sender,
814 struct buffer_if *buf)
820 if (buf->size < (int)sizeof(struct icmphdr)) {
821 Message(M_WARNING,"%s: short packet addressed to secnet; "
822 "ignoring it\n",st->name);
826 h=(struct icmphdr *)buf->start;
828 unsigned fraginfo = ntohs(h->iph.frag);
829 if ((fraginfo&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
830 if (!(fraginfo & IPHDR_FRAG_OFF))
831 /* report only for first fragment */
832 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
833 "ignoring it\n",st->name);
838 if (h->iph.protocol==1) {
840 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
841 /* ICMP echo-request. Special case: we re-use the buffer
842 to construct the reply. */
843 h->type=ICMP_TYPE_ECHO_REPLY;
844 h->iph.daddr=h->iph.saddr;
845 h->iph.saddr=htonl(st->secnet_address);
848 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
849 netlink_icmp_csum(h);
850 netlink_packet_deliver(st,NULL,buf);
853 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
855 /* Send ICMP protocol unreachable */
856 netlink_icmp_simple(st,sender,buf,ICMP_TYPE_UNREACHABLE,
857 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
865 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
867 static void netlink_incoming(struct netlink *st, struct netlink_client *sender,
868 struct buffer_if *buf)
870 uint32_t source,dest;
873 const char *sourcedesc=sender?sender->name:"host";
875 BUF_ASSERT_USED(buf);
877 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
878 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
884 assert(buf->size >= (int)sizeof(struct iphdr));
885 iph=(struct iphdr *)buf->start;
887 source=ntohl(iph->saddr);
888 dest=ntohl(iph->daddr);
890 /* Check source. If we don't like the source, there's no point
891 generating ICMP because we won't know how to get it to the
892 source of the packet. */
894 /* Check that the packet source is appropriate for the tunnel
896 if (!ipset_contains_addr(sender->networks,source)) {
898 s=ipaddr_to_string(source);
899 d=ipaddr_to_string(dest);
900 Message(M_WARNING,"%s: packet from tunnel %s with bad "
901 "source address (s=%s,d=%s)\n",st->name,sender->name,s,d);
907 /* Check that the packet originates in our configured local
908 network, and hasn't been forwarded from elsewhere or
909 generated with the wrong source address */
910 if (!ipset_contains_addr(st->networks,source)) {
912 s=ipaddr_to_string(source);
913 d=ipaddr_to_string(dest);
914 Message(M_WARNING,"%s: outgoing packet with bad source address "
915 "(s=%s,d=%s)\n",st->name,s,d);
922 /* If this is a point-to-point device we don't examine the
923 destination address at all; we blindly send it down our
924 one-and-only registered tunnel, or to the host, depending on
925 where it came from. It's up to external software to check
926 address validity and generate ICMP, etc. */
929 netlink_host_deliver(st,sender,source,dest,buf);
931 netlink_client_deliver(st,st->clients,source,dest,buf);
933 BUF_ASSERT_FREE(buf);
937 /* st->secnet_address needs checking before matching destination
939 if (dest==st->secnet_address) {
940 netlink_packet_local(st,sender,buf);
941 BUF_ASSERT_FREE(buf);
944 netlink_packet_forward(st,sender,buf);
945 BUF_ASSERT_FREE(buf);
948 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
950 struct netlink_client *c=sst;
951 struct netlink *st=c->nst;
953 netlink_incoming(st,c,buf);
956 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
958 struct netlink *st=sst;
960 netlink_incoming(st,NULL,buf);
963 static void netlink_set_quality(void *sst, uint32_t quality)
965 struct netlink_client *c=sst;
966 struct netlink *st=c->nst;
968 c->link_quality=quality;
969 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
970 if (c->options&OPT_SOFTROUTE) {
971 st->set_routes(st->dst,c);
975 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
976 struct subnet_list *snets)
981 for (i=0; i<snets->entries; i++) {
982 net=subnet_to_string(snets->list[i]);
983 Message(loglevel,"%s ",net);
988 static void netlink_dump_routes(struct netlink *st, bool_t requested)
994 if (requested) c=M_WARNING;
996 net=ipaddr_to_string(st->secnet_address);
997 Message(c,"%s: point-to-point (remote end is %s); routes: ",
1000 netlink_output_subnets(st,c,st->clients->subnets);
1003 Message(c,"%s: routing table:\n",st->name);
1004 for (i=0; i<st->n_clients; i++) {
1005 netlink_output_subnets(st,c,st->routes[i]->subnets);
1006 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
1007 "quality %d,use %d,pri %lu)\n",
1008 st->routes[i]->name,
1009 st->routes[i]->up?"up":"down",
1011 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
1012 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
1013 st->routes[i]->link_quality,
1014 st->routes[i]->outcount,
1015 (unsigned long)st->routes[i]->priority);
1017 net=ipaddr_to_string(st->secnet_address);
1018 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
1019 net,st->name,st->localcount);
1021 for (i=0; i<st->subnets->entries; i++) {
1022 net=subnet_to_string(st->subnets->list[i]);
1023 Message(c,"%s ",net);
1027 Message(c,"-> host (use %d)\n",st->outcount);
1031 /* ap is a pointer to a member of the routes array */
1032 static int netlink_compare_client_priority(const void *ap, const void *bp)
1034 const struct netlink_client *const*a=ap;
1035 const struct netlink_client *const*b=bp;
1037 if ((*a)->priority==(*b)->priority) return 0;
1038 if ((*a)->priority<(*b)->priority) return 1;
1042 static void netlink_phase_hook(void *sst, uint32_t new_phase)
1044 struct netlink *st=sst;
1045 struct netlink_client *c;
1048 /* All the networks serviced by the various tunnels should now
1049 * have been registered. We build a routing table by sorting the
1050 * clients by priority. */
1051 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
1052 "netlink_phase_hook");
1053 /* Fill the table */
1055 for (c=st->clients; c; c=c->next) {
1059 /* Sort the table in descending order of priority */
1060 qsort(st->routes,st->n_clients,sizeof(*st->routes),
1061 netlink_compare_client_priority);
1063 netlink_dump_routes(st,False);
1066 static void netlink_signal_handler(void *sst, int signum)
1068 struct netlink *st=sst;
1069 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
1070 netlink_dump_routes(st,True);
1073 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
1075 struct netlink_client *c=sst;
1080 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
1081 void *dst, uint32_t *localmtu_r)
1083 struct netlink_client *c=sst;
1084 struct netlink *st=c->nst;
1090 *localmtu_r=st->mtu;
1093 static struct flagstr netlink_option_table[]={
1094 { "soft", OPT_SOFTROUTE },
1095 { "allow-route", OPT_ALLOWROUTE },
1098 /* This is the routine that gets called when the closure that's
1099 returned by an invocation of a netlink device closure (eg. tun,
1100 userv-ipif) is invoked. It's used to create routes and pass in
1101 information about them; the closure it returns is used by site
1103 static closure_t *netlink_inst_create(struct netlink *st,
1104 struct cloc loc, dict_t *dict)
1106 struct netlink_client *c;
1108 struct ipset *networks;
1109 uint32_t options,priority;
1113 name=dict_read_string(dict, "name", True, st->name, loc);
1115 l=dict_lookup(dict,"routes");
1117 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
1118 networks=string_list_to_ipset(l,loc,st->name,"routes");
1119 options=string_list_to_word(dict_lookup(dict,"options"),
1120 netlink_option_table,st->name);
1122 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
1123 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
1125 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
1126 cfgfatal(loc,st->name,"this netlink device does not support "
1131 if (options&OPT_SOFTROUTE) {
1132 /* XXX for now we assume that soft routes require root privilege;
1133 this may not always be true. The device driver can tell us. */
1134 require_root_privileges=True;
1135 require_root_privileges_explanation="netlink: soft routes";
1137 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
1143 /* Check that nets are a subset of st->remote_networks;
1144 refuse to register if they are not. */
1145 if (!ipset_is_subset(st->remote_networks,networks)) {
1146 cfgfatal(loc,st->name,"routes are not allowed\n");
1150 c=safe_malloc(sizeof(*c),"netlink_inst_create");
1151 c->cl.description=name;
1152 c->cl.type=CL_NETLINK;
1154 c->cl.interface=&c->ops;
1156 c->ops.reg=netlink_inst_reg;
1157 c->ops.deliver=netlink_inst_incoming;
1158 c->ops.set_quality=netlink_set_quality;
1159 c->ops.set_mtu=netlink_inst_set_mtu;
1162 c->networks=networks;
1163 c->subnets=ipset_to_subnet_list(networks);
1164 c->priority=priority;
1168 c->link_quality=LINK_QUALITY_UNUSED;
1169 c->mtu=mtu?mtu:st->mtu;
1174 c->next=st->clients;
1176 assert(st->n_clients < INT_MAX);
1182 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
1183 dict_t *context, list_t *args)
1185 struct netlink *st=self->interface;
1191 item=list_elem(args,0);
1192 if (!item || item->type!=t_dict) {
1193 cfgfatal(loc,st->name,"must have a dictionary argument\n");
1195 dict=item->data.dict;
1197 cl=netlink_inst_create(st,loc,dict);
1199 return new_closure(cl);
1202 netlink_deliver_fn *netlink_init(struct netlink *st,
1203 void *dst, struct cloc loc,
1204 dict_t *dict, cstring_t description,
1205 netlink_route_fn *set_routes,
1206 netlink_deliver_fn *to_host)
1212 st->cl.description=description;
1213 st->cl.type=CL_PURE;
1214 st->cl.apply=netlink_inst_apply;
1215 st->cl.interface=st;
1219 st->set_routes=set_routes;
1220 st->deliver_to_host=to_host;
1222 st->name=dict_read_string(dict,"name",False,description,loc);
1223 if (!st->name) st->name=description;
1224 l=dict_lookup(dict,"networks");
1226 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1228 struct ipset *empty;
1230 st->networks=ipset_complement(empty);
1233 l=dict_lookup(dict,"remote-networks");
1235 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1238 struct ipset *empty;
1240 st->remote_networks=ipset_complement(empty);
1243 st->local_address=string_item_to_ipaddr(
1244 dict_find_item(dict,"local-address", True, "netlink", loc),"netlink");
1246 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1247 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1249 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1250 "ptp-address in the same netlink device\n");
1252 if (!(sa || ptpa)) {
1253 cfgfatal(loc,st->name,"you must specify secnet-address or "
1254 "ptp-address for this netlink device\n");
1257 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1260 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1263 /* To be strictly correct we could subtract secnet_address from
1264 networks here. It shouldn't make any practical difference,
1265 though, and will make the route dump look complicated... */
1266 st->subnets=ipset_to_subnet_list(st->networks);
1267 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1268 buffer_new(&st->icmp,MAX(ICMP_BUFSIZE,st->mtu));
1272 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1273 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1275 /* If we're point-to-point then we return a CL_NETLINK directly,
1276 rather than a CL_NETLINK_OLD or pure closure (depending on
1277 compatibility). This CL_NETLINK is for our one and only
1278 client. Our cl.apply function is NULL. */
1281 cl=netlink_inst_create(st,loc,dict);
1284 return netlink_dev_incoming;
1287 /* No connection to the kernel at all... */
1293 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1295 struct null *st=sst;
1297 if (routes->up!=routes->kup) {
1298 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1299 st->nl.name,routes->name,
1300 routes->up?"up":"down");
1301 routes->kup=routes->up;
1307 static void null_deliver(void *sst, struct buffer_if *buf)
1312 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1319 st=safe_malloc(sizeof(*st),"null_apply");
1321 item=list_elem(args,0);
1322 if (!item || item->type!=t_dict)
1323 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1325 dict=item->data.dict;
1327 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1330 return new_closure(&st->nl.cl);
1333 void netlink_module(dict_t *dict)
1335 add_closure(dict,"null-netlink",null_apply);