1 /* User-kernel network link */
3 /* See RFCs 791, 792, 1123 and 1812 */
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
14 /* Points to note from RFC1812 (which may require changes in this
17 3.3.4 Maximum Transmission Unit - MTU
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
29 4.2.1 A router SHOULD count datagrams discarded.
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
35 5.3.13.4 Source Route Options
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
42 5.3.13.5 Record Route Option
44 Routers MUST support the Record Route option in forwarded packets.
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
54 5.3.13.6 Timestamp Option
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
110 #define MDEBUG(...) Message(M_DEBUG, __VA_ARGS__)
111 #else /* !NETLINK_DEBUG */
112 #define MDEBUG(...) ((void)0)
113 #endif /* !NETLINK_DEBUG */
115 #define ICMP_TYPE_ECHO_REPLY 0
117 #define ICMP_TYPE_UNREACHABLE 3
118 #define ICMP_CODE_NET_UNREACHABLE 0
119 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
120 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
121 #define ICMP_CODE_NET_PROHIBITED 13
123 #define ICMP_TYPE_ECHO_REQUEST 8
125 #define ICMP_TYPE_TIME_EXCEEDED 11
126 #define ICMP_CODE_TTL_EXCEEDED 0
128 /* Generic IP checksum routine */
129 static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
131 register uint32_t sum=0;
134 sum+=ntohs(*(uint16_t *)iph);
139 sum+=*(uint8_t *)iph;
141 sum=(sum&0xffff)+(sum>>16);
147 * This is a version of ip_compute_csum() optimized for IP headers,
148 * which always checksum on 4 octet boundaries.
150 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
153 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
156 __asm__ __volatile__(
162 "adcl 12(%1), %0 ;\n"
163 "1: adcl 16(%1), %0 ;\n"
174 /* Since the input registers which are loaded with iph and ipl
175 are modified, we must also specify them as outputs, or gcc
176 will assume they contain their original values. */
177 : "=r" (sum), "=r" (iph), "=r" (ihl)
178 : "1" (iph), "2" (ihl)
183 static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
185 assert(ihl < INT_MAX/4);
186 return ip_csum(iph,ihl*4);
191 #if defined (WORDS_BIGENDIAN)
202 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
203 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
204 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
205 /* reserved 0x8000 */
211 /* The options start here. */
219 union icmpinfofield {
234 static const union icmpinfofield icmp_noinfo;
236 static void netlink_packet_deliver(struct netlink *st,
237 struct netlink_client *client,
238 struct buffer_if *buf);
240 /* XXX RFC1812 4.3.2.5:
241 All other ICMP error messages (Destination Unreachable,
242 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
243 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
244 CONTROL). The IP Precedence value for these error messages MAY be
247 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
248 uint32_t dest,uint16_t len)
252 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
253 buffer_init(&st->icmp,calculate_max_start_pad());
254 h=buf_append(&st->icmp,sizeof(*h));
259 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
262 h->iph.ttl=255; /* XXX should be configurable */
264 h->iph.saddr=htonl(st->secnet_address);
265 h->iph.daddr=htonl(dest);
267 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
274 /* Fill in the ICMP checksum field correctly */
275 static void netlink_icmp_csum(struct icmphdr *h)
279 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
281 h->check=ip_csum(&h->type,len);
285 * An ICMP error message MUST NOT be sent as the result of
288 * * an ICMP error message, or
290 * * a datagram destined to an IP broadcast or IP multicast
293 * * a datagram sent as a link-layer broadcast, or
295 * * a non-initial fragment, or
297 * * a datagram whose source address does not define a single
298 * host -- e.g., a zero address, a loopback address, a
299 * broadcast address, a multicast address, or a Class E
302 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
305 struct icmphdr *icmph;
308 if (buf->size < (int)sizeof(struct icmphdr)) return False;
309 iph=(struct iphdr *)buf->start;
310 icmph=(struct icmphdr *)buf->start;
311 if (iph->protocol==1) {
312 switch(icmph->type) {
313 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
314 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
315 * Deprecated, reserved, unassigned and experimental
316 * options are treated as not safe to reply to.
318 case 0: /* Echo Reply */
320 case 13: /* Timestamp */
321 case 14: /* Timestamp Reply */
327 /* How do we spot broadcast destination addresses? */
328 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
329 source=ntohl(iph->saddr);
330 if (source==0) return False;
331 if ((source&0xff000000)==0x7f000000) return False;
332 /* How do we spot broadcast source addresses? */
333 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
334 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
338 /* How much of the original IP packet do we include in its ICMP
339 response? The header plus up to 64 bits. */
342 4.3.2.3 Original Message Header
344 Historically, every ICMP error message has included the Internet
345 header and at least the first 8 data bytes of the datagram that
346 triggered the error. This is no longer adequate, due to the use of
347 IP-in-IP tunneling and other technologies. Therefore, the ICMP
348 datagram SHOULD contain as much of the original datagram as possible
349 without the length of the ICMP datagram exceeding 576 bytes. The
350 returned IP header (and user data) MUST be identical to that which
351 was received, except that the router is not required to undo any
352 modifications to the IP header that are normally performed in
353 forwarding that were performed before the error was detected (e.g.,
354 decrementing the TTL, or updating options). Note that the
355 requirements of Section [4.3.3.5] supersede this requirement in some
356 cases (i.e., for a Parameter Problem message, if the problem is in a
357 modified field, the router must undo the modification). See Section
360 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
362 if (buf->size < (int)sizeof(struct iphdr)) return 0;
363 struct iphdr *iph=(struct iphdr *)buf->start;
367 /* We include the first 8 bytes of the packet data, provided they exist */
369 plen=ntohs(iph->tot_len);
370 return (hlen>plen?plen:hlen);
373 /* client indicates where the packet we're constructing a response to
374 comes from. NULL indicates the host. */
375 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
376 uint8_t type, uint8_t code,
377 union icmpinfofield info)
382 if (netlink_icmp_may_reply(buf)) {
383 struct iphdr *iph=(struct iphdr *)buf->start;
384 len=netlink_icmp_reply_len(buf);
385 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
386 h->type=type; h->code=code; h->d=info;
387 memcpy(buf_append(&st->icmp,len),buf->start,len);
388 netlink_icmp_csum(h);
389 netlink_packet_deliver(st,NULL,&st->icmp);
390 BUF_ASSERT_FREE(&st->icmp);
395 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
397 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
399 * Is the datagram acceptable?
401 * 1. Length at least the size of an ip header
403 * 3. Checksums correctly.
404 * 4. Doesn't have a bogus length
406 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
407 char *errmsgbuf, int errmsgbuflen)
409 #define BAD(...) do{ \
410 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
414 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
415 struct iphdr *iph=(struct iphdr *)buf->start;
418 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
419 if (iph->version != 4) BAD("version %u",iph->version);
420 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
421 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
422 len=ntohs(iph->tot_len);
423 /* There should be no padding */
424 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
425 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
426 /* XXX check that there's no source route specified */
432 /* Deliver a packet _to_ client; used after we have decided
433 * what to do with it (and just to check that the client has
434 * actually registered a delivery function with us). */
435 static void netlink_client_deliver(struct netlink *st,
436 struct netlink_client *client,
437 uint32_t source, uint32_t dest,
438 struct buffer_if *buf)
440 if (!client->deliver) {
442 s=ipaddr_to_string(source);
443 d=ipaddr_to_string(dest);
444 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
450 client->deliver(client->dst, buf);
454 /* Deliver a packet to the host; used after we have decided that that
455 * is what to do with it. */
456 static void netlink_host_deliver(struct netlink *st,
457 uint32_t source, uint32_t dest,
458 struct buffer_if *buf)
460 st->deliver_to_host(st->dst,buf);
464 /* Deliver a packet. "client" is the _origin_ of the packet, not its
465 destination, and is NULL for packets from the host and packets
466 generated internally in secnet. */
467 static void netlink_packet_deliver(struct netlink *st,
468 struct netlink_client *client,
469 struct buffer_if *buf)
471 if (buf->size < (int)sizeof(struct iphdr)) {
472 Message(M_ERR,"%s: trying to deliver a too-short packet"
473 " from %s!\n",st->name, client?client->name:"(local)");
478 struct iphdr *iph=(struct iphdr *)buf->start;
479 uint32_t dest=ntohl(iph->daddr);
480 uint32_t source=ntohl(iph->saddr);
481 uint32_t best_quality;
482 bool_t allow_route=False;
483 bool_t found_allowed=False;
487 BUF_ASSERT_USED(buf);
489 if (dest==st->secnet_address) {
490 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
495 /* Packets from the host (client==NULL) may always be routed. Packets
496 from clients with the allow_route option will also be routed. */
497 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
500 /* If !allow_route, we check the routing table anyway, and if
501 there's a suitable route with OPT_ALLOWROUTE set we use it. If
502 there's a suitable route, but none with OPT_ALLOWROUTE set then
503 we generate ICMP 'communication with destination network
504 administratively prohibited'. */
508 for (i=0; i<st->n_clients; i++) {
509 if (st->routes[i]->up &&
510 ipset_contains_addr(st->routes[i]->networks,dest)) {
511 /* It's an available route to the correct destination. But is
512 it better than the one we already have? */
514 /* If we have already found an allowed route then we don't
515 bother looking at routes we're not allowed to use. If
516 we don't yet have an allowed route we'll consider any. */
517 if (!allow_route && found_allowed) {
518 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
521 if (st->routes[i]->link_quality>best_quality
522 || best_quality==0) {
523 best_quality=st->routes[i]->link_quality;
525 if (st->routes[i]->options&OPT_ALLOWROUTE)
527 /* If quality isn't perfect we may wish to
528 consider kicking the tunnel with a 0-length
529 packet to prompt it to perform a key setup.
530 Then it'll eventually decide it's up or
532 /* If quality is perfect and we're allowed to use the
533 route we don't need to search any more. */
534 if (best_quality>=MAXIMUM_LINK_QUALITY &&
535 (allow_route || found_allowed)) break;
539 if (best_match==-1) {
540 /* The packet's not going down a tunnel. It might (ought to)
542 if (ipset_contains_addr(st->networks,dest)) {
543 netlink_host_deliver(st,source,dest,buf);
544 BUF_ASSERT_FREE(buf);
547 s=ipaddr_to_string(source);
548 d=ipaddr_to_string(dest);
549 Message(M_DEBUG,"%s: don't know where to deliver packet "
550 "(s=%s, d=%s)\n", st->name, s, d);
552 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
553 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
558 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
560 s=ipaddr_to_string(source);
561 d=ipaddr_to_string(dest);
562 /* We have a usable route but aren't allowed to use it.
563 Generate ICMP destination unreachable: communication
564 with destination network administratively prohibited */
565 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
569 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
570 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
573 if (best_quality>0) {
574 /* XXX Fragment if required */
575 netlink_client_deliver(st,st->routes[best_match],
577 BUF_ASSERT_FREE(buf);
579 /* Generate ICMP destination unreachable */
580 netlink_icmp_simple(st,buf,
581 ICMP_TYPE_UNREACHABLE,
582 ICMP_CODE_NET_UNREACHABLE,
588 BUF_ASSERT_FREE(buf);
591 static void netlink_packet_forward(struct netlink *st,
592 struct netlink_client *client,
593 struct buffer_if *buf)
595 if (buf->size < (int)sizeof(struct iphdr)) return;
596 struct iphdr *iph=(struct iphdr *)buf->start;
598 BUF_ASSERT_USED(buf);
600 /* Packet has already been checked */
602 /* Generate ICMP time exceeded */
603 netlink_icmp_simple(st,buf,ICMP_TYPE_TIME_EXCEEDED,
604 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
610 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
612 netlink_packet_deliver(st,client,buf);
613 BUF_ASSERT_FREE(buf);
616 /* Deal with packets addressed explicitly to us */
617 static void netlink_packet_local(struct netlink *st,
618 struct netlink_client *client,
619 struct buffer_if *buf)
625 if (buf->size < (int)sizeof(struct icmphdr)) {
626 Message(M_WARNING,"%s: short packet addressed to secnet; "
627 "ignoring it\n",st->name);
631 h=(struct icmphdr *)buf->start;
633 if ((ntohs(h->iph.frag)&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
634 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
635 "ignoring it\n",st->name);
640 if (h->iph.protocol==1) {
642 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
643 /* ICMP echo-request. Special case: we re-use the buffer
644 to construct the reply. */
645 h->type=ICMP_TYPE_ECHO_REPLY;
646 h->iph.daddr=h->iph.saddr;
647 h->iph.saddr=htonl(st->secnet_address);
650 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
651 netlink_icmp_csum(h);
652 netlink_packet_deliver(st,NULL,buf);
655 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
657 /* Send ICMP protocol unreachable */
658 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
659 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
667 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
669 static void netlink_incoming(struct netlink *st, struct netlink_client *client,
670 struct buffer_if *buf)
672 uint32_t source,dest;
675 const char *sourcedesc=client?client->name:"host";
677 BUF_ASSERT_USED(buf);
679 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
680 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
686 assert(buf->size >= (int)sizeof(struct icmphdr));
687 iph=(struct iphdr *)buf->start;
689 source=ntohl(iph->saddr);
690 dest=ntohl(iph->daddr);
692 /* Check source. If we don't like the source, there's no point
693 generating ICMP because we won't know how to get it to the
694 source of the packet. */
696 /* Check that the packet source is appropriate for the tunnel
698 if (!ipset_contains_addr(client->networks,source)) {
700 s=ipaddr_to_string(source);
701 d=ipaddr_to_string(dest);
702 Message(M_WARNING,"%s: packet from tunnel %s with bad "
703 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
709 /* Check that the packet originates in our configured local
710 network, and hasn't been forwarded from elsewhere or
711 generated with the wrong source address */
712 if (!ipset_contains_addr(st->networks,source)) {
714 s=ipaddr_to_string(source);
715 d=ipaddr_to_string(dest);
716 Message(M_WARNING,"%s: outgoing packet with bad source address "
717 "(s=%s,d=%s)\n",st->name,s,d);
724 /* If this is a point-to-point device we don't examine the
725 destination address at all; we blindly send it down our
726 one-and-only registered tunnel, or to the host, depending on
727 where it came from. It's up to external software to check
728 address validity and generate ICMP, etc. */
731 netlink_host_deliver(st,source,dest,buf);
733 netlink_client_deliver(st,st->clients,source,dest,buf);
735 BUF_ASSERT_FREE(buf);
739 /* st->secnet_address needs checking before matching destination
741 if (dest==st->secnet_address) {
742 netlink_packet_local(st,client,buf);
743 BUF_ASSERT_FREE(buf);
746 netlink_packet_forward(st,client,buf);
747 BUF_ASSERT_FREE(buf);
750 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
752 struct netlink_client *c=sst;
753 struct netlink *st=c->nst;
755 netlink_incoming(st,c,buf);
758 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
760 struct netlink *st=sst;
762 netlink_incoming(st,NULL,buf);
765 static void netlink_set_quality(void *sst, uint32_t quality)
767 struct netlink_client *c=sst;
768 struct netlink *st=c->nst;
770 c->link_quality=quality;
771 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
772 if (c->options&OPT_SOFTROUTE) {
773 st->set_routes(st->dst,c);
777 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
778 struct subnet_list *snets)
783 for (i=0; i<snets->entries; i++) {
784 net=subnet_to_string(snets->list[i]);
785 Message(loglevel,"%s ",net);
790 static void netlink_dump_routes(struct netlink *st, bool_t requested)
796 if (requested) c=M_WARNING;
798 net=ipaddr_to_string(st->secnet_address);
799 Message(c,"%s: point-to-point (remote end is %s); routes: ",
802 netlink_output_subnets(st,c,st->clients->subnets);
805 Message(c,"%s: routing table:\n",st->name);
806 for (i=0; i<st->n_clients; i++) {
807 netlink_output_subnets(st,c,st->routes[i]->subnets);
808 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
809 "quality %d,use %d,pri %lu)\n",
811 st->routes[i]->up?"up":"down",
813 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
814 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
815 st->routes[i]->link_quality,
816 st->routes[i]->outcount,
817 (unsigned long)st->routes[i]->priority);
819 net=ipaddr_to_string(st->secnet_address);
820 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
821 net,st->name,st->localcount);
823 for (i=0; i<st->subnets->entries; i++) {
824 net=subnet_to_string(st->subnets->list[i]);
825 Message(c,"%s ",net);
829 Message(c,"-> host (use %d)\n",st->outcount);
833 /* ap is a pointer to a member of the routes array */
834 static int netlink_compare_client_priority(const void *ap, const void *bp)
836 const struct netlink_client *const*a=ap;
837 const struct netlink_client *const*b=bp;
839 if ((*a)->priority==(*b)->priority) return 0;
840 if ((*a)->priority<(*b)->priority) return 1;
844 static void netlink_phase_hook(void *sst, uint32_t new_phase)
846 struct netlink *st=sst;
847 struct netlink_client *c;
850 /* All the networks serviced by the various tunnels should now
851 * have been registered. We build a routing table by sorting the
852 * clients by priority. */
853 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
854 "netlink_phase_hook");
857 for (c=st->clients; c; c=c->next) {
861 /* Sort the table in descending order of priority */
862 qsort(st->routes,st->n_clients,sizeof(*st->routes),
863 netlink_compare_client_priority);
865 netlink_dump_routes(st,False);
868 static void netlink_signal_handler(void *sst, int signum)
870 struct netlink *st=sst;
871 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
872 netlink_dump_routes(st,True);
875 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
877 struct netlink_client *c=sst;
882 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
885 struct netlink_client *c=sst;
891 static struct flagstr netlink_option_table[]={
892 { "soft", OPT_SOFTROUTE },
893 { "allow-route", OPT_ALLOWROUTE },
896 /* This is the routine that gets called when the closure that's
897 returned by an invocation of a netlink device closure (eg. tun,
898 userv-ipif) is invoked. It's used to create routes and pass in
899 information about them; the closure it returns is used by site
901 static closure_t *netlink_inst_create(struct netlink *st,
902 struct cloc loc, dict_t *dict)
904 struct netlink_client *c;
906 struct ipset *networks;
907 uint32_t options,priority;
911 name=dict_read_string(dict, "name", True, st->name, loc);
913 l=dict_lookup(dict,"routes");
915 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
916 networks=string_list_to_ipset(l,loc,st->name,"routes");
917 options=string_list_to_word(dict_lookup(dict,"options"),
918 netlink_option_table,st->name);
920 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
921 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
923 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
924 cfgfatal(loc,st->name,"this netlink device does not support "
929 if (options&OPT_SOFTROUTE) {
930 /* XXX for now we assume that soft routes require root privilege;
931 this may not always be true. The device driver can tell us. */
932 require_root_privileges=True;
933 require_root_privileges_explanation="netlink: soft routes";
935 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
941 /* Check that nets are a subset of st->remote_networks;
942 refuse to register if they are not. */
943 if (!ipset_is_subset(st->remote_networks,networks)) {
944 cfgfatal(loc,st->name,"routes are not allowed\n");
948 c=safe_malloc(sizeof(*c),"netlink_inst_create");
949 c->cl.description=name;
950 c->cl.type=CL_NETLINK;
952 c->cl.interface=&c->ops;
954 c->ops.reg=netlink_inst_reg;
955 c->ops.deliver=netlink_inst_incoming;
956 c->ops.set_quality=netlink_set_quality;
957 c->ops.set_mtu=netlink_inst_set_mtu;
960 c->networks=networks;
961 c->subnets=ipset_to_subnet_list(networks);
962 c->priority=priority;
966 c->link_quality=LINK_QUALITY_UNUSED;
967 c->mtu=mtu?mtu:st->mtu;
974 assert(st->n_clients < INT_MAX);
980 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
981 dict_t *context, list_t *args)
983 struct netlink *st=self->interface;
989 item=list_elem(args,0);
990 if (!item || item->type!=t_dict) {
991 cfgfatal(loc,st->name,"must have a dictionary argument\n");
993 dict=item->data.dict;
995 cl=netlink_inst_create(st,loc,dict);
997 return new_closure(cl);
1000 netlink_deliver_fn *netlink_init(struct netlink *st,
1001 void *dst, struct cloc loc,
1002 dict_t *dict, cstring_t description,
1003 netlink_route_fn *set_routes,
1004 netlink_deliver_fn *to_host)
1010 st->cl.description=description;
1011 st->cl.type=CL_PURE;
1012 st->cl.apply=netlink_inst_apply;
1013 st->cl.interface=st;
1017 st->set_routes=set_routes;
1018 st->deliver_to_host=to_host;
1020 st->name=dict_read_string(dict,"name",False,description,loc);
1021 if (!st->name) st->name=description;
1022 l=dict_lookup(dict,"networks");
1024 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1026 struct ipset *empty;
1028 st->networks=ipset_complement(empty);
1031 l=dict_lookup(dict,"remote-networks");
1033 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1036 struct ipset *empty;
1038 st->remote_networks=ipset_complement(empty);
1042 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1043 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1045 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1046 "ptp-address in the same netlink device\n");
1048 if (!(sa || ptpa)) {
1049 cfgfatal(loc,st->name,"you must specify secnet-address or "
1050 "ptp-address for this netlink device\n");
1053 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1056 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1059 /* To be strictly correct we could subtract secnet_address from
1060 networks here. It shouldn't make any practical difference,
1061 though, and will make the route dump look complicated... */
1062 st->subnets=ipset_to_subnet_list(st->networks);
1063 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1064 buffer_new(&st->icmp,ICMP_BUFSIZE);
1068 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1069 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1071 /* If we're point-to-point then we return a CL_NETLINK directly,
1072 rather than a CL_NETLINK_OLD or pure closure (depending on
1073 compatibility). This CL_NETLINK is for our one and only
1074 client. Our cl.apply function is NULL. */
1077 cl=netlink_inst_create(st,loc,dict);
1080 return netlink_dev_incoming;
1083 /* No connection to the kernel at all... */
1089 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1091 struct null *st=sst;
1093 if (routes->up!=routes->kup) {
1094 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1095 st->nl.name,routes->name,
1096 routes->up?"up":"down");
1097 routes->kup=routes->up;
1103 static void null_deliver(void *sst, struct buffer_if *buf)
1108 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1115 st=safe_malloc(sizeof(*st),"null_apply");
1117 item=list_elem(args,0);
1118 if (!item || item->type!=t_dict)
1119 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1121 dict=item->data.dict;
1123 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1126 return new_closure(&st->nl.cl);
1129 void netlink_module(dict_t *dict)
1131 add_closure(dict,"null-netlink",null_apply);