1 /* User-kernel network link */
3 /* See RFCs 791, 792, 1123 and 1812 */
5 /* The netlink device is actually a router. Tunnels are unnumbered
6 point-to-point lines (RFC1812 section 2.2.7); the router has a
7 single address (the 'router-id'). */
9 /* This is where we currently have the anti-spoofing paranoia - before
10 sending a packet to the kernel we check that the tunnel it came
11 over could reasonably have produced it. */
14 /* Points to note from RFC1812 (which may require changes in this
17 3.3.4 Maximum Transmission Unit - MTU
19 The MTU of each logical interface MUST be configurable within the
20 range of legal MTUs for the interface.
22 Many Link Layer protocols define a maximum frame size that may be
23 sent. In such cases, a router MUST NOT allow an MTU to be set which
24 would allow sending of frames larger than those allowed by the Link
25 Layer protocol. However, a router SHOULD be willing to receive a
26 packet as large as the maximum frame size even if that is larger than
29 4.2.1 A router SHOULD count datagrams discarded.
31 4.2.2.1 Source route options - we probably should implement processing
32 of source routes, even though mostly the security policy will prevent
35 5.3.13.4 Source Route Options
37 A router MUST implement support for source route options in forwarded
38 packets. A router MAY implement a configuration option that, when
39 enabled, causes all source-routed packets to be discarded. However,
40 such an option MUST NOT be enabled by default.
42 5.3.13.5 Record Route Option
44 Routers MUST support the Record Route option in forwarded packets.
46 A router MAY provide a configuration option that, if enabled, will
47 cause the router to ignore (i.e., pass through unchanged) Record
48 Route options in forwarded packets. If provided, such an option MUST
49 default to enabling the record-route. This option should not affect
50 the processing of Record Route options in datagrams received by the
51 router itself (in particular, Record Route options in ICMP echo
52 requests will still be processed according to Section [4.3.3.6]).
54 5.3.13.6 Timestamp Option
56 Routers MUST support the timestamp option in forwarded packets. A
57 timestamp value MUST follow the rules given [INTRO:2].
59 If the flags field = 3 (timestamp and prespecified address), the
60 router MUST add its timestamp if the next prespecified address
61 matches any of the router's IP addresses. It is not necessary that
62 the prespecified address be either the address of the interface on
63 which the packet arrived or the address of the interface over which
67 4.2.2.7 Fragmentation: RFC 791 Section 3.2
69 Fragmentation, as described in [INTERNET:1], MUST be supported by a
72 4.2.2.8 Reassembly: RFC 791 Section 3.2
74 As specified in the corresponding section of [INTRO:2], a router MUST
75 support reassembly of datagrams that it delivers to itself.
77 4.2.2.9 Time to Live: RFC 791 Section 3.2
79 Note in particular that a router MUST NOT check the TTL of a packet
80 except when forwarding it.
82 A router MUST NOT discard a datagram just because it was received
83 with TTL equal to zero or one; if it is to the router and otherwise
84 valid, the router MUST attempt to receive it.
86 On messages the router originates, the IP layer MUST provide a means
87 for the transport layer to set the TTL field of every datagram that
88 is sent. When a fixed TTL value is used, it MUST be configurable.
91 8.1 The Simple Network Management Protocol - SNMP
92 8.1.1 SNMP Protocol Elements
94 Routers MUST be manageable by SNMP [MGT:3]. The SNMP MUST operate
95 using UDP/IP as its transport and network protocols.
109 #define ICMP_TYPE_ECHO_REPLY 0
111 #define ICMP_TYPE_UNREACHABLE 3
112 #define ICMP_CODE_NET_UNREACHABLE 0
113 #define ICMP_CODE_PROTOCOL_UNREACHABLE 2
114 #define ICMP_CODE_FRAGMENTATION_REQUIRED 4
115 #define ICMP_CODE_NET_PROHIBITED 13
117 #define ICMP_TYPE_ECHO_REQUEST 8
119 #define ICMP_TYPE_TIME_EXCEEDED 11
120 #define ICMP_CODE_TTL_EXCEEDED 0
122 /* Generic IP checksum routine */
123 static inline uint16_t ip_csum(const uint8_t *iph,int32_t count)
125 register uint32_t sum=0;
128 sum+=ntohs(*(uint16_t *)iph);
133 sum+=*(uint8_t *)iph;
135 sum=(sum&0xffff)+(sum>>16);
141 * This is a version of ip_compute_csum() optimized for IP headers,
142 * which always checksum on 4 octet boundaries.
144 * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
147 static inline uint16_t ip_fast_csum(const uint8_t *iph, int32_t ihl) {
150 __asm__ __volatile__(
156 "adcl 12(%1), %0 ;\n"
157 "1: adcl 16(%1), %0 ;\n"
168 /* Since the input registers which are loaded with iph and ipl
169 are modified, we must also specify them as outputs, or gcc
170 will assume they contain their original values. */
171 : "=r" (sum), "=r" (iph), "=r" (ihl)
172 : "1" (iph), "2" (ihl)
177 static inline uint16_t ip_fast_csum(uint8_t *iph, int32_t ihl)
179 assert(ihl < INT_MAX/4);
180 return ip_csum(iph,ihl*4);
185 #if defined (WORDS_BIGENDIAN)
196 #define IPHDR_FRAG_OFF ((uint16_t)0x1fff)
197 #define IPHDR_FRAG_MORE ((uint16_t)0x2000)
198 #define IPHDR_FRAG_DONT ((uint16_t)0x4000)
199 /* reserved 0x8000 */
205 /* The options start here. */
213 union icmpinfofield {
228 static const union icmpinfofield icmp_noinfo;
230 static void netlink_packet_deliver(struct netlink *st,
231 struct netlink_client *client,
232 struct buffer_if *buf);
234 /* XXX RFC1812 4.3.2.5:
235 All other ICMP error messages (Destination Unreachable,
236 Redirect, Time Exceeded, and Parameter Problem) SHOULD have their
237 precedence value set to 6 (INTERNETWORK CONTROL) or 7 (NETWORK
238 CONTROL). The IP Precedence value for these error messages MAY be
241 static struct icmphdr *netlink_icmp_tmpl(struct netlink *st,
242 uint32_t dest,uint16_t len)
246 BUF_ALLOC(&st->icmp,"netlink_icmp_tmpl");
247 buffer_init(&st->icmp,calculate_max_start_pad());
248 h=buf_append(&st->icmp,sizeof(*h));
253 h->iph.tot_len=htons(len+(h->iph.ihl*4)+8);
256 h->iph.ttl=255; /* XXX should be configurable */
258 h->iph.saddr=htonl(st->secnet_address);
259 h->iph.daddr=htonl(dest);
261 h->iph.check=ip_fast_csum((uint8_t *)&h->iph,h->iph.ihl);
268 /* Fill in the ICMP checksum field correctly */
269 static void netlink_icmp_csum(struct icmphdr *h)
273 len=ntohs(h->iph.tot_len)-(4*h->iph.ihl);
275 h->check=ip_csum(&h->type,len);
279 * An ICMP error message MUST NOT be sent as the result of
282 * * an ICMP error message, or
284 * * a datagram destined to an IP broadcast or IP multicast
287 * * a datagram sent as a link-layer broadcast, or
289 * * a non-initial fragment, or
291 * * a datagram whose source address does not define a single
292 * host -- e.g., a zero address, a loopback address, a
293 * broadcast address, a multicast address, or a Class E
296 static bool_t netlink_icmp_may_reply(struct buffer_if *buf)
299 struct icmphdr *icmph;
302 if (buf->size < (int)sizeof(struct icmphdr)) return False;
303 iph=(struct iphdr *)buf->start;
304 icmph=(struct icmphdr *)buf->start;
305 if (iph->protocol==1) {
306 switch(icmph->type) {
307 /* Based on http://www.iana.org/assignments/icmp-parameters/icmp-parameters.xhtml#icmp-parameters-types
308 * as retrieved Thu, 20 Mar 2014 00:16:44 +0000.
309 * Deprecated, reserved, unassigned and experimental
310 * options are treated as not safe to reply to.
312 case 0: /* Echo Reply */
314 case 13: /* Timestamp */
315 case 14: /* Timestamp Reply */
321 /* How do we spot broadcast destination addresses? */
322 if (ntohs(iph->frag)&IPHDR_FRAG_OFF) return False;
323 source=ntohl(iph->saddr);
324 if (source==0) return False;
325 if ((source&0xff000000)==0x7f000000) return False;
326 /* How do we spot broadcast source addresses? */
327 if ((source&0xf0000000)==0xe0000000) return False; /* Multicast */
328 if ((source&0xf0000000)==0xf0000000) return False; /* Class E */
332 /* How much of the original IP packet do we include in its ICMP
333 response? The header plus up to 64 bits. */
336 4.3.2.3 Original Message Header
338 Historically, every ICMP error message has included the Internet
339 header and at least the first 8 data bytes of the datagram that
340 triggered the error. This is no longer adequate, due to the use of
341 IP-in-IP tunneling and other technologies. Therefore, the ICMP
342 datagram SHOULD contain as much of the original datagram as possible
343 without the length of the ICMP datagram exceeding 576 bytes. The
344 returned IP header (and user data) MUST be identical to that which
345 was received, except that the router is not required to undo any
346 modifications to the IP header that are normally performed in
347 forwarding that were performed before the error was detected (e.g.,
348 decrementing the TTL, or updating options). Note that the
349 requirements of Section [4.3.3.5] supersede this requirement in some
350 cases (i.e., for a Parameter Problem message, if the problem is in a
351 modified field, the router must undo the modification). See Section
354 static uint16_t netlink_icmp_reply_len(struct buffer_if *buf)
356 if (buf->size < (int)sizeof(struct iphdr)) return 0;
357 struct iphdr *iph=(struct iphdr *)buf->start;
361 /* We include the first 8 bytes of the packet data, provided they exist */
363 plen=ntohs(iph->tot_len);
364 return (hlen>plen?plen:hlen);
367 /* client indicates where the packet we're constructing a response to
368 comes from. NULL indicates the host. */
369 static void netlink_icmp_simple(struct netlink *st, struct buffer_if *buf,
370 uint8_t type, uint8_t code,
371 union icmpinfofield info)
376 if (netlink_icmp_may_reply(buf)) {
377 struct iphdr *iph=(struct iphdr *)buf->start;
378 len=netlink_icmp_reply_len(buf);
379 h=netlink_icmp_tmpl(st,ntohl(iph->saddr),len);
380 h->type=type; h->code=code; h->d=info;
381 memcpy(buf_append(&st->icmp,len),buf->start,len);
382 netlink_icmp_csum(h);
383 netlink_packet_deliver(st,NULL,&st->icmp);
384 BUF_ASSERT_FREE(&st->icmp);
389 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the
391 * RFC1812: 4.2.2.5 MUST discard messages containing invalid checksums.
393 * Is the datagram acceptable?
395 * 1. Length at least the size of an ip header
397 * 3. Checksums correctly.
398 * 4. Doesn't have a bogus length
400 static bool_t netlink_check(struct netlink *st, struct buffer_if *buf,
401 char *errmsgbuf, int errmsgbuflen)
403 #define BAD(...) do{ \
404 snprintf(errmsgbuf,errmsgbuflen,__VA_ARGS__); \
408 if (buf->size < (int)sizeof(struct iphdr)) BAD("len %"PRIu32"",buf->size);
409 struct iphdr *iph=(struct iphdr *)buf->start;
412 if (iph->ihl < 5) BAD("ihl %u",iph->ihl);
413 if (iph->version != 4) BAD("version %u",iph->version);
414 if (buf->size < iph->ihl*4) BAD("size %"PRId32"<%u*4",buf->size,iph->ihl);
415 if (ip_fast_csum((uint8_t *)iph, iph->ihl)!=0) BAD("csum");
416 len=ntohs(iph->tot_len);
417 /* There should be no padding */
418 if (buf->size!=len) BAD("len %"PRId32"!=%"PRId32,buf->size,len);
419 if (len<(iph->ihl<<2)) BAD("len %"PRId32"<(%u<<2)",len,iph->ihl);
420 /* XXX check that there's no source route specified */
426 /* Deliver a packet _to_ client; used after we have decided
427 * what to do with it (and just to check that the client has
428 * actually registered a delivery function with us). */
429 static void netlink_client_deliver(struct netlink *st,
430 struct netlink_client *client,
431 uint32_t source, uint32_t dest,
432 struct buffer_if *buf)
434 if (!client->deliver) {
436 s=ipaddr_to_string(source);
437 d=ipaddr_to_string(dest);
438 Message(M_ERR,"%s: dropping %s->%s, client not registered\n",
444 client->deliver(client->dst, buf);
448 /* Deliver a packet. "client" is the _origin_ of the packet, not its
449 destination, and is NULL for packets from the host and packets
450 generated internally in secnet. */
451 static void netlink_packet_deliver(struct netlink *st,
452 struct netlink_client *client,
453 struct buffer_if *buf)
455 if (buf->size < (int)sizeof(struct iphdr)) {
456 Message(M_ERR,"%s: trying to deliver a too-short packet"
457 " from %s!\n",st->name, client?client->name:"(local)");
462 struct iphdr *iph=(struct iphdr *)buf->start;
463 uint32_t dest=ntohl(iph->daddr);
464 uint32_t source=ntohl(iph->saddr);
465 uint32_t best_quality;
466 bool_t allow_route=False;
467 bool_t found_allowed=False;
471 BUF_ASSERT_USED(buf);
473 if (dest==st->secnet_address) {
474 Message(M_ERR,"%s: trying to deliver a packet to myself!\n",st->name);
479 /* Packets from the host (client==NULL) may always be routed. Packets
480 from clients with the allow_route option will also be routed. */
481 if (!client || (client && (client->options & OPT_ALLOWROUTE)))
484 /* If !allow_route, we check the routing table anyway, and if
485 there's a suitable route with OPT_ALLOWROUTE set we use it. If
486 there's a suitable route, but none with OPT_ALLOWROUTE set then
487 we generate ICMP 'communication with destination network
488 administratively prohibited'. */
492 for (i=0; i<st->n_clients; i++) {
493 if (st->routes[i]->up &&
494 ipset_contains_addr(st->routes[i]->networks,dest)) {
495 /* It's an available route to the correct destination. But is
496 it better than the one we already have? */
498 /* If we have already found an allowed route then we don't
499 bother looking at routes we're not allowed to use. If
500 we don't yet have an allowed route we'll consider any. */
501 if (!allow_route && found_allowed) {
502 if (!(st->routes[i]->options&OPT_ALLOWROUTE)) continue;
505 if (st->routes[i]->link_quality>best_quality
506 || best_quality==0) {
507 best_quality=st->routes[i]->link_quality;
509 if (st->routes[i]->options&OPT_ALLOWROUTE)
511 /* If quality isn't perfect we may wish to
512 consider kicking the tunnel with a 0-length
513 packet to prompt it to perform a key setup.
514 Then it'll eventually decide it's up or
516 /* If quality is perfect and we're allowed to use the
517 route we don't need to search any more. */
518 if (best_quality>=MAXIMUM_LINK_QUALITY &&
519 (allow_route || found_allowed)) break;
523 if (best_match==-1) {
524 /* The packet's not going down a tunnel. It might (ought to)
526 if (ipset_contains_addr(st->networks,dest)) {
527 st->deliver_to_host(st->dst,buf);
529 BUF_ASSERT_FREE(buf);
532 s=ipaddr_to_string(source);
533 d=ipaddr_to_string(dest);
534 Message(M_DEBUG,"%s: don't know where to deliver packet "
535 "(s=%s, d=%s)\n", st->name, s, d);
537 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
538 ICMP_CODE_NET_UNREACHABLE, icmp_noinfo);
543 !(st->routes[best_match]->options&OPT_ALLOWROUTE)) {
545 s=ipaddr_to_string(source);
546 d=ipaddr_to_string(dest);
547 /* We have a usable route but aren't allowed to use it.
548 Generate ICMP destination unreachable: communication
549 with destination network administratively prohibited */
550 Message(M_NOTICE,"%s: denied forwarding for packet (s=%s, d=%s)\n",
554 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
555 ICMP_CODE_NET_PROHIBITED, icmp_noinfo);
558 if (best_quality>0) {
559 /* XXX Fragment if required */
560 netlink_client_deliver(st,st->routes[best_match],
562 BUF_ASSERT_FREE(buf);
564 /* Generate ICMP destination unreachable */
565 netlink_icmp_simple(st,buf,
566 ICMP_TYPE_UNREACHABLE,
567 ICMP_CODE_NET_UNREACHABLE,
573 BUF_ASSERT_FREE(buf);
576 static void netlink_packet_forward(struct netlink *st,
577 struct netlink_client *client,
578 struct buffer_if *buf)
580 if (buf->size < (int)sizeof(struct iphdr)) return;
581 struct iphdr *iph=(struct iphdr *)buf->start;
583 BUF_ASSERT_USED(buf);
585 /* Packet has already been checked */
587 /* Generate ICMP time exceeded */
588 netlink_icmp_simple(st,buf,ICMP_TYPE_TIME_EXCEEDED,
589 ICMP_CODE_TTL_EXCEEDED,icmp_noinfo);
595 iph->check=ip_fast_csum((uint8_t *)iph,iph->ihl);
597 netlink_packet_deliver(st,client,buf);
598 BUF_ASSERT_FREE(buf);
601 /* Deal with packets addressed explicitly to us */
602 static void netlink_packet_local(struct netlink *st,
603 struct netlink_client *client,
604 struct buffer_if *buf)
610 if (buf->size < (int)sizeof(struct icmphdr)) {
611 Message(M_WARNING,"%s: short packet addressed to secnet; "
612 "ignoring it\n",st->name);
616 h=(struct icmphdr *)buf->start;
618 if ((ntohs(h->iph.frag)&(IPHDR_FRAG_OFF|IPHDR_FRAG_MORE))!=0) {
619 Message(M_WARNING,"%s: fragmented packet addressed to secnet; "
620 "ignoring it\n",st->name);
625 if (h->iph.protocol==1) {
627 if (h->type==ICMP_TYPE_ECHO_REQUEST && h->code==0) {
628 /* ICMP echo-request. Special case: we re-use the buffer
629 to construct the reply. */
630 h->type=ICMP_TYPE_ECHO_REPLY;
631 h->iph.daddr=h->iph.saddr;
632 h->iph.saddr=htonl(st->secnet_address);
635 h->iph.check=ip_fast_csum((uint8_t *)h,h->iph.ihl);
636 netlink_icmp_csum(h);
637 netlink_packet_deliver(st,NULL,buf);
640 Message(M_WARNING,"%s: unknown incoming ICMP\n",st->name);
642 /* Send ICMP protocol unreachable */
643 netlink_icmp_simple(st,buf,ICMP_TYPE_UNREACHABLE,
644 ICMP_CODE_PROTOCOL_UNREACHABLE,icmp_noinfo);
652 /* If cid==NULL packet is from host, otherwise cid specifies which tunnel
654 static void netlink_incoming(struct netlink *st, struct netlink_client *client,
655 struct buffer_if *buf)
657 uint32_t source,dest;
660 const char *sourcedesc=client?client->name:"host";
662 BUF_ASSERT_USED(buf);
664 if (!netlink_check(st,buf,errmsgbuf,sizeof(errmsgbuf))) {
665 Message(M_WARNING,"%s: bad IP packet from %s: %s\n",
671 assert(buf->size >= (int)sizeof(struct icmphdr));
672 iph=(struct iphdr *)buf->start;
674 source=ntohl(iph->saddr);
675 dest=ntohl(iph->daddr);
677 /* Check source. If we don't like the source, there's no point
678 generating ICMP because we won't know how to get it to the
679 source of the packet. */
681 /* Check that the packet source is appropriate for the tunnel
683 if (!ipset_contains_addr(client->networks,source)) {
685 s=ipaddr_to_string(source);
686 d=ipaddr_to_string(dest);
687 Message(M_WARNING,"%s: packet from tunnel %s with bad "
688 "source address (s=%s,d=%s)\n",st->name,client->name,s,d);
694 /* Check that the packet originates in our configured local
695 network, and hasn't been forwarded from elsewhere or
696 generated with the wrong source address */
697 if (!ipset_contains_addr(st->networks,source)) {
699 s=ipaddr_to_string(source);
700 d=ipaddr_to_string(dest);
701 Message(M_WARNING,"%s: outgoing packet with bad source address "
702 "(s=%s,d=%s)\n",st->name,s,d);
709 /* If this is a point-to-point device we don't examine the
710 destination address at all; we blindly send it down our
711 one-and-only registered tunnel, or to the host, depending on
712 where it came from. It's up to external software to check
713 address validity and generate ICMP, etc. */
716 st->deliver_to_host(st->dst,buf);
718 netlink_client_deliver(st,st->clients,source,dest,buf);
720 BUF_ASSERT_FREE(buf);
724 /* st->secnet_address needs checking before matching destination
726 if (dest==st->secnet_address) {
727 netlink_packet_local(st,client,buf);
728 BUF_ASSERT_FREE(buf);
731 netlink_packet_forward(st,client,buf);
732 BUF_ASSERT_FREE(buf);
735 static void netlink_inst_incoming(void *sst, struct buffer_if *buf)
737 struct netlink_client *c=sst;
738 struct netlink *st=c->nst;
740 netlink_incoming(st,c,buf);
743 static void netlink_dev_incoming(void *sst, struct buffer_if *buf)
745 struct netlink *st=sst;
747 netlink_incoming(st,NULL,buf);
750 static void netlink_set_quality(void *sst, uint32_t quality)
752 struct netlink_client *c=sst;
753 struct netlink *st=c->nst;
755 c->link_quality=quality;
756 c->up=(c->link_quality==LINK_QUALITY_DOWN)?False:True;
757 if (c->options&OPT_SOFTROUTE) {
758 st->set_routes(st->dst,c);
762 static void netlink_output_subnets(struct netlink *st, uint32_t loglevel,
763 struct subnet_list *snets)
768 for (i=0; i<snets->entries; i++) {
769 net=subnet_to_string(snets->list[i]);
770 Message(loglevel,"%s ",net);
775 static void netlink_dump_routes(struct netlink *st, bool_t requested)
781 if (requested) c=M_WARNING;
783 net=ipaddr_to_string(st->secnet_address);
784 Message(c,"%s: point-to-point (remote end is %s); routes: ",
787 netlink_output_subnets(st,c,st->clients->subnets);
790 Message(c,"%s: routing table:\n",st->name);
791 for (i=0; i<st->n_clients; i++) {
792 netlink_output_subnets(st,c,st->routes[i]->subnets);
793 Message(c,"-> tunnel %s (%s,mtu %d,%s routes,%s,"
794 "quality %d,use %d,pri %lu)\n",
796 st->routes[i]->up?"up":"down",
798 st->routes[i]->options&OPT_SOFTROUTE?"soft":"hard",
799 st->routes[i]->options&OPT_ALLOWROUTE?"free":"restricted",
800 st->routes[i]->link_quality,
801 st->routes[i]->outcount,
802 (unsigned long)st->routes[i]->priority);
804 net=ipaddr_to_string(st->secnet_address);
805 Message(c,"%s/32 -> netlink \"%s\" (use %d)\n",
806 net,st->name,st->localcount);
808 for (i=0; i<st->subnets->entries; i++) {
809 net=subnet_to_string(st->subnets->list[i]);
810 Message(c,"%s ",net);
814 Message(c,"-> host (use %d)\n",st->outcount);
818 /* ap is a pointer to a member of the routes array */
819 static int netlink_compare_client_priority(const void *ap, const void *bp)
821 const struct netlink_client *const*a=ap;
822 const struct netlink_client *const*b=bp;
824 if ((*a)->priority==(*b)->priority) return 0;
825 if ((*a)->priority<(*b)->priority) return 1;
829 static void netlink_phase_hook(void *sst, uint32_t new_phase)
831 struct netlink *st=sst;
832 struct netlink_client *c;
835 /* All the networks serviced by the various tunnels should now
836 * have been registered. We build a routing table by sorting the
837 * clients by priority. */
838 st->routes=safe_malloc_ary(sizeof(*st->routes),st->n_clients,
839 "netlink_phase_hook");
842 for (c=st->clients; c; c=c->next) {
846 /* Sort the table in descending order of priority */
847 qsort(st->routes,st->n_clients,sizeof(*st->routes),
848 netlink_compare_client_priority);
850 netlink_dump_routes(st,False);
853 static void netlink_signal_handler(void *sst, int signum)
855 struct netlink *st=sst;
856 Message(M_INFO,"%s: route dump requested by SIGUSR1\n",st->name);
857 netlink_dump_routes(st,True);
860 static void netlink_inst_set_mtu(void *sst, int32_t new_mtu)
862 struct netlink_client *c=sst;
867 static void netlink_inst_reg(void *sst, netlink_deliver_fn *deliver,
870 struct netlink_client *c=sst;
876 static struct flagstr netlink_option_table[]={
877 { "soft", OPT_SOFTROUTE },
878 { "allow-route", OPT_ALLOWROUTE },
881 /* This is the routine that gets called when the closure that's
882 returned by an invocation of a netlink device closure (eg. tun,
883 userv-ipif) is invoked. It's used to create routes and pass in
884 information about them; the closure it returns is used by site
886 static closure_t *netlink_inst_create(struct netlink *st,
887 struct cloc loc, dict_t *dict)
889 struct netlink_client *c;
891 struct ipset *networks;
892 uint32_t options,priority;
896 name=dict_read_string(dict, "name", True, st->name, loc);
898 l=dict_lookup(dict,"routes");
900 cfgfatal(loc,st->name,"required parameter \"routes\" not found\n");
901 networks=string_list_to_ipset(l,loc,st->name,"routes");
902 options=string_list_to_word(dict_lookup(dict,"options"),
903 netlink_option_table,st->name);
905 priority=dict_read_number(dict,"priority",False,st->name,loc,0);
906 mtu=dict_read_number(dict,"mtu",False,st->name,loc,0);
908 if ((options&OPT_SOFTROUTE) && !st->set_routes) {
909 cfgfatal(loc,st->name,"this netlink device does not support "
914 if (options&OPT_SOFTROUTE) {
915 /* XXX for now we assume that soft routes require root privilege;
916 this may not always be true. The device driver can tell us. */
917 require_root_privileges=True;
918 require_root_privileges_explanation="netlink: soft routes";
920 cfgfatal(loc,st->name,"point-to-point netlinks do not support "
926 /* Check that nets are a subset of st->remote_networks;
927 refuse to register if they are not. */
928 if (!ipset_is_subset(st->remote_networks,networks)) {
929 cfgfatal(loc,st->name,"routes are not allowed\n");
933 c=safe_malloc(sizeof(*c),"netlink_inst_create");
934 c->cl.description=name;
935 c->cl.type=CL_NETLINK;
937 c->cl.interface=&c->ops;
939 c->ops.reg=netlink_inst_reg;
940 c->ops.deliver=netlink_inst_incoming;
941 c->ops.set_quality=netlink_set_quality;
942 c->ops.set_mtu=netlink_inst_set_mtu;
945 c->networks=networks;
946 c->subnets=ipset_to_subnet_list(networks);
947 c->priority=priority;
951 c->link_quality=LINK_QUALITY_UNUSED;
952 c->mtu=mtu?mtu:st->mtu;
959 assert(st->n_clients < INT_MAX);
965 static list_t *netlink_inst_apply(closure_t *self, struct cloc loc,
966 dict_t *context, list_t *args)
968 struct netlink *st=self->interface;
974 item=list_elem(args,0);
975 if (!item || item->type!=t_dict) {
976 cfgfatal(loc,st->name,"must have a dictionary argument\n");
978 dict=item->data.dict;
980 cl=netlink_inst_create(st,loc,dict);
982 return new_closure(cl);
985 netlink_deliver_fn *netlink_init(struct netlink *st,
986 void *dst, struct cloc loc,
987 dict_t *dict, cstring_t description,
988 netlink_route_fn *set_routes,
989 netlink_deliver_fn *to_host)
995 st->cl.description=description;
997 st->cl.apply=netlink_inst_apply;
1002 st->set_routes=set_routes;
1003 st->deliver_to_host=to_host;
1005 st->name=dict_read_string(dict,"name",False,description,loc);
1006 if (!st->name) st->name=description;
1007 l=dict_lookup(dict,"networks");
1009 st->networks=string_list_to_ipset(l,loc,st->name,"networks");
1011 struct ipset *empty;
1013 st->networks=ipset_complement(empty);
1016 l=dict_lookup(dict,"remote-networks");
1018 st->remote_networks=string_list_to_ipset(l,loc,st->name,
1021 struct ipset *empty;
1023 st->remote_networks=ipset_complement(empty);
1027 sa=dict_find_item(dict,"secnet-address",False,"netlink",loc);
1028 ptpa=dict_find_item(dict,"ptp-address",False,"netlink",loc);
1030 cfgfatal(loc,st->name,"you may not specify secnet-address and "
1031 "ptp-address in the same netlink device\n");
1033 if (!(sa || ptpa)) {
1034 cfgfatal(loc,st->name,"you must specify secnet-address or "
1035 "ptp-address for this netlink device\n");
1038 st->secnet_address=string_item_to_ipaddr(sa,"netlink");
1041 st->secnet_address=string_item_to_ipaddr(ptpa,"netlink");
1044 /* To be strictly correct we could subtract secnet_address from
1045 networks here. It shouldn't make any practical difference,
1046 though, and will make the route dump look complicated... */
1047 st->subnets=ipset_to_subnet_list(st->networks);
1048 st->mtu=dict_read_number(dict, "mtu", False, "netlink", loc, DEFAULT_MTU);
1049 buffer_new(&st->icmp,ICMP_BUFSIZE);
1053 add_hook(PHASE_SETUP,netlink_phase_hook,st);
1054 request_signal_notification(SIGUSR1, netlink_signal_handler, st);
1056 /* If we're point-to-point then we return a CL_NETLINK directly,
1057 rather than a CL_NETLINK_OLD or pure closure (depending on
1058 compatibility). This CL_NETLINK is for our one and only
1059 client. Our cl.apply function is NULL. */
1062 cl=netlink_inst_create(st,loc,dict);
1065 return netlink_dev_incoming;
1068 /* No connection to the kernel at all... */
1074 static bool_t null_set_route(void *sst, struct netlink_client *routes)
1076 struct null *st=sst;
1078 if (routes->up!=routes->kup) {
1079 Message(M_INFO,"%s: setting routes for tunnel %s to state %s\n",
1080 st->nl.name,routes->name,
1081 routes->up?"up":"down");
1082 routes->kup=routes->up;
1088 static void null_deliver(void *sst, struct buffer_if *buf)
1093 static list_t *null_apply(closure_t *self, struct cloc loc, dict_t *context,
1100 st=safe_malloc(sizeof(*st),"null_apply");
1102 item=list_elem(args,0);
1103 if (!item || item->type!=t_dict)
1104 cfgfatal(loc,"null-netlink","parameter must be a dictionary\n");
1106 dict=item->data.dict;
1108 netlink_init(&st->nl,st,loc,dict,"null-netlink",null_set_route,
1111 return new_closure(&st->nl.cl);
1114 void netlink_module(dict_t *dict)
1116 add_closure(dict,"null-netlink",null_apply);