1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Tom Gundersen <teg@jklm.no>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/socket.h>
31 #include "rtnl-internal.h"
32 #include "rtnl-util.h"
34 static int sd_rtnl_new(sd_rtnl **ret) {
35 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
37 assert_return(ret, -EINVAL);
39 rtnl = new0(sd_rtnl, 1);
43 rtnl->n_ref = REFCNT_INIT;
47 rtnl->sockaddr.nl.nl_family = AF_NETLINK;
49 rtnl->original_pid = getpid();
51 LIST_HEAD_INIT(rtnl->match_callbacks);
53 /* We guarantee that wqueue always has space for at least
55 if (!GREEDY_REALLOC(rtnl->wqueue, rtnl->wqueue_allocated, 1))
58 /* We guarantee that the read buffer has at least space for
60 if (!greedy_realloc((void**)&rtnl->rbuffer, &rtnl->rbuffer_allocated,
61 sizeof(struct nlmsghdr), sizeof(uint8_t)))
64 /* Change notification responses have sequence 0, so we must
65 * start our request sequence numbers at 1, or we may confuse our
66 * responses with notifications from the kernel */
75 int sd_rtnl_new_from_netlink(sd_rtnl **ret, int fd) {
76 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
80 assert_return(ret, -EINVAL);
82 r = sd_rtnl_new(&rtnl);
86 addrlen = sizeof(rtnl->sockaddr);
88 r = getsockname(fd, &rtnl->sockaddr.sa, &addrlen);
100 static bool rtnl_pid_changed(sd_rtnl *rtnl) {
103 /* We don't support people creating an rtnl connection and
104 * keeping it around over a fork(). Let's complain. */
106 return rtnl->original_pid != getpid();
109 int sd_rtnl_open_fd(sd_rtnl **ret, int fd) {
110 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
114 assert_return(ret, -EINVAL);
115 assert_return(fd >= 0, -EINVAL);
117 r = sd_rtnl_new(&rtnl);
121 r = setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
125 r = setsockopt(fd, SOL_NETLINK, NETLINK_PKTINFO, &one, sizeof(one));
129 addrlen = sizeof(rtnl->sockaddr);
131 r = bind(fd, &rtnl->sockaddr.sa, addrlen);
132 /* ignore EINVAL to allow opening an already bound socket */
133 if (r < 0 && errno != EINVAL)
136 r = getsockname(fd, &rtnl->sockaddr.sa, &addrlen);
148 int sd_rtnl_open(sd_rtnl **ret) {
149 _cleanup_close_ int fd = -1;
152 fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_ROUTE);
156 r = sd_rtnl_open_fd(ret, fd);
165 static int rtnl_join_broadcast_group(sd_rtnl *rtnl, unsigned group) {
169 assert(rtnl->fd >= 0);
172 r = setsockopt(rtnl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group));
179 int sd_rtnl_inc_rcvbuf(const sd_rtnl *const rtnl, const int size) {
180 return fd_inc_rcvbuf(rtnl->fd, size);
183 sd_rtnl *sd_rtnl_ref(sd_rtnl *rtnl) {
184 assert_return(rtnl, NULL);
185 assert_return(!rtnl_pid_changed(rtnl), NULL);
188 assert_se(REFCNT_INC(rtnl->n_ref) >= 2);
193 sd_rtnl *sd_rtnl_unref(sd_rtnl *rtnl) {
197 assert_return(!rtnl_pid_changed(rtnl), NULL);
199 if (REFCNT_DEC(rtnl->n_ref) == 0) {
200 struct match_callback *f;
203 for (i = 0; i < rtnl->rqueue_size; i++)
204 sd_rtnl_message_unref(rtnl->rqueue[i]);
207 for (i = 0; i < rtnl->rqueue_partial_size; i++)
208 sd_rtnl_message_unref(rtnl->rqueue_partial[i]);
209 free(rtnl->rqueue_partial);
211 for (i = 0; i < rtnl->wqueue_size; i++)
212 sd_rtnl_message_unref(rtnl->wqueue[i]);
217 hashmap_free_free(rtnl->reply_callbacks);
218 prioq_free(rtnl->reply_callbacks_prioq);
220 sd_event_source_unref(rtnl->io_event_source);
221 sd_event_source_unref(rtnl->time_event_source);
222 sd_event_source_unref(rtnl->exit_event_source);
223 sd_event_unref(rtnl->event);
225 while ((f = rtnl->match_callbacks)) {
226 LIST_REMOVE(match_callbacks, rtnl->match_callbacks, f);
230 safe_close(rtnl->fd);
237 static void rtnl_seal_message(sd_rtnl *rtnl, sd_rtnl_message *m) {
239 assert(!rtnl_pid_changed(rtnl));
243 /* don't use seq == 0, as that is used for broadcasts, so we
244 would get confused by replies to such messages */
245 m->hdr->nlmsg_seq = rtnl->serial++ ? : rtnl->serial++;
247 rtnl_message_seal(m);
252 int sd_rtnl_send(sd_rtnl *nl,
253 sd_rtnl_message *message,
257 assert_return(nl, -EINVAL);
258 assert_return(!rtnl_pid_changed(nl), -ECHILD);
259 assert_return(message, -EINVAL);
260 assert_return(!message->sealed, -EPERM);
262 rtnl_seal_message(nl, message);
264 if (nl->wqueue_size <= 0) {
266 r = socket_write_message(nl, message);
270 /* nothing was sent, so let's put it on
272 nl->wqueue[0] = sd_rtnl_message_ref(message);
276 /* append to queue */
277 if (nl->wqueue_size >= RTNL_WQUEUE_MAX) {
278 log_debug("rtnl: exhausted the write queue size (%d)", RTNL_WQUEUE_MAX);
282 if (!GREEDY_REALLOC(nl->wqueue, nl->wqueue_allocated, nl->wqueue_size + 1))
285 nl->wqueue[nl->wqueue_size ++] = sd_rtnl_message_ref(message);
289 *serial = rtnl_message_get_serial(message);
294 int rtnl_rqueue_make_room(sd_rtnl *rtnl) {
297 if (rtnl->rqueue_size >= RTNL_RQUEUE_MAX) {
298 log_debug("rtnl: exhausted the read queue size (%d)", RTNL_RQUEUE_MAX);
302 if (!GREEDY_REALLOC(rtnl->rqueue, rtnl->rqueue_allocated, rtnl->rqueue_size + 1))
308 int rtnl_rqueue_partial_make_room(sd_rtnl *rtnl) {
311 if (rtnl->rqueue_partial_size >= RTNL_RQUEUE_MAX) {
312 log_debug("rtnl: exhausted the partial read queue size (%d)", RTNL_RQUEUE_MAX);
316 if (!GREEDY_REALLOC(rtnl->rqueue_partial, rtnl->rqueue_partial_allocated,
317 rtnl->rqueue_partial_size + 1))
323 static int dispatch_rqueue(sd_rtnl *rtnl, sd_rtnl_message **message) {
329 if (rtnl->rqueue_size <= 0) {
330 /* Try to read a new message */
331 r = socket_read_message(rtnl);
336 /* Dispatch a queued message */
337 *message = rtnl->rqueue[0];
338 rtnl->rqueue_size --;
339 memmove(rtnl->rqueue, rtnl->rqueue + 1, sizeof(sd_rtnl_message*) * rtnl->rqueue_size);
344 static int dispatch_wqueue(sd_rtnl *rtnl) {
349 while (rtnl->wqueue_size > 0) {
350 r = socket_write_message(rtnl, rtnl->wqueue[0]);
354 /* Didn't do anything this time */
357 /* see equivalent in sd-bus.c */
358 sd_rtnl_message_unref(rtnl->wqueue[0]);
359 rtnl->wqueue_size --;
360 memmove(rtnl->wqueue, rtnl->wqueue + 1, sizeof(sd_rtnl_message*) * rtnl->wqueue_size);
369 static int process_timeout(sd_rtnl *rtnl) {
370 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
371 struct reply_callback *c;
377 c = prioq_peek(rtnl->reply_callbacks_prioq);
381 n = now(CLOCK_MONOTONIC);
385 r = rtnl_message_new_synthetic_error(-ETIMEDOUT, c->serial, &m);
389 assert_se(prioq_pop(rtnl->reply_callbacks_prioq) == c);
390 hashmap_remove(rtnl->reply_callbacks, &c->serial);
392 r = c->callback(rtnl, m, c->userdata);
394 log_debug_errno(r, "sd-rtnl: timedout callback failed: %m");
401 static int process_reply(sd_rtnl *rtnl, sd_rtnl_message *m) {
402 _cleanup_free_ struct reply_callback *c = NULL;
410 serial = rtnl_message_get_serial(m);
411 c = hashmap_remove(rtnl->reply_callbacks, &serial);
416 prioq_remove(rtnl->reply_callbacks_prioq, c, &c->prioq_idx);
418 r = sd_rtnl_message_get_type(m, &type);
422 if (type == NLMSG_DONE)
425 r = c->callback(rtnl, m, c->userdata);
427 log_debug_errno(r, "sd-rtnl: callback failed: %m");
432 static int process_match(sd_rtnl *rtnl, sd_rtnl_message *m) {
433 struct match_callback *c;
440 r = sd_rtnl_message_get_type(m, &type);
444 LIST_FOREACH(match_callbacks, c, rtnl->match_callbacks) {
445 if (type == c->type) {
446 r = c->callback(rtnl, m, c->userdata);
449 log_debug_errno(r, "sd-rtnl: match callback failed: %m");
459 static int process_running(sd_rtnl *rtnl, sd_rtnl_message **ret) {
460 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
465 r = process_timeout(rtnl);
469 r = dispatch_wqueue(rtnl);
473 r = dispatch_rqueue(rtnl, &m);
479 if (sd_rtnl_message_is_broadcast(m)) {
480 r = process_match(rtnl, m);
484 r = process_reply(rtnl, m);
505 int sd_rtnl_process(sd_rtnl *rtnl, sd_rtnl_message **ret) {
506 RTNL_DONT_DESTROY(rtnl);
509 assert_return(rtnl, -EINVAL);
510 assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
511 assert_return(!rtnl->processing, -EBUSY);
513 rtnl->processing = true;
514 r = process_running(rtnl, ret);
515 rtnl->processing = false;
520 static usec_t calc_elapse(uint64_t usec) {
521 if (usec == (uint64_t) -1)
525 usec = RTNL_DEFAULT_TIMEOUT;
527 return now(CLOCK_MONOTONIC) + usec;
530 static int rtnl_poll(sd_rtnl *rtnl, bool need_more, uint64_t timeout_usec) {
531 struct pollfd p[1] = {};
533 usec_t m = USEC_INFINITY;
538 e = sd_rtnl_get_events(rtnl);
543 /* Caller wants more data, and doesn't care about
544 * what's been read or any other timeouts. */
548 /* Caller wants to process if there is something to
549 * process, but doesn't care otherwise */
551 r = sd_rtnl_get_timeout(rtnl, &until);
556 nw = now(CLOCK_MONOTONIC);
557 m = until > nw ? until - nw : 0;
561 if (timeout_usec != (uint64_t) -1 && (m == (uint64_t) -1 || timeout_usec < m))
567 r = ppoll(p, 1, m == (uint64_t) -1 ? NULL : timespec_store(&ts, m), NULL);
571 return r > 0 ? 1 : 0;
574 int sd_rtnl_wait(sd_rtnl *nl, uint64_t timeout_usec) {
575 assert_return(nl, -EINVAL);
576 assert_return(!rtnl_pid_changed(nl), -ECHILD);
578 if (nl->rqueue_size > 0)
581 return rtnl_poll(nl, false, timeout_usec);
584 static int timeout_compare(const void *a, const void *b) {
585 const struct reply_callback *x = a, *y = b;
587 if (x->timeout != 0 && y->timeout == 0)
590 if (x->timeout == 0 && y->timeout != 0)
593 if (x->timeout < y->timeout)
596 if (x->timeout > y->timeout)
602 int sd_rtnl_call_async(sd_rtnl *nl,
604 sd_rtnl_message_handler_t callback,
608 struct reply_callback *c;
612 assert_return(nl, -EINVAL);
613 assert_return(m, -EINVAL);
614 assert_return(callback, -EINVAL);
615 assert_return(!rtnl_pid_changed(nl), -ECHILD);
617 r = hashmap_ensure_allocated(&nl->reply_callbacks, &uint64_hash_ops);
621 if (usec != (uint64_t) -1) {
622 r = prioq_ensure_allocated(&nl->reply_callbacks_prioq, timeout_compare);
627 c = new0(struct reply_callback, 1);
631 c->callback = callback;
632 c->userdata = userdata;
633 c->timeout = calc_elapse(usec);
635 k = sd_rtnl_send(nl, m, &s);
643 r = hashmap_put(nl->reply_callbacks, &c->serial, c);
649 if (c->timeout != 0) {
650 r = prioq_put(nl->reply_callbacks_prioq, c, &c->prioq_idx);
653 sd_rtnl_call_async_cancel(nl, c->serial);
664 int sd_rtnl_call_async_cancel(sd_rtnl *nl, uint32_t serial) {
665 struct reply_callback *c;
668 assert_return(nl, -EINVAL);
669 assert_return(serial != 0, -EINVAL);
670 assert_return(!rtnl_pid_changed(nl), -ECHILD);
672 c = hashmap_remove(nl->reply_callbacks, &s);
677 prioq_remove(nl->reply_callbacks_prioq, c, &c->prioq_idx);
683 int sd_rtnl_call(sd_rtnl *rtnl,
684 sd_rtnl_message *message,
686 sd_rtnl_message **ret) {
691 assert_return(rtnl, -EINVAL);
692 assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
693 assert_return(message, -EINVAL);
695 r = sd_rtnl_send(rtnl, message, &serial);
699 timeout = calc_elapse(usec);
705 for (i = 0; i < rtnl->rqueue_size; i++) {
706 uint32_t received_serial;
708 received_serial = rtnl_message_get_serial(rtnl->rqueue[i]);
710 if (received_serial == serial) {
711 _cleanup_rtnl_message_unref_ sd_rtnl_message *incoming = NULL;
714 incoming = rtnl->rqueue[i];
716 /* found a match, remove from rqueue and return it */
717 memmove(rtnl->rqueue + i,rtnl->rqueue + i + 1,
718 sizeof(sd_rtnl_message*) * (rtnl->rqueue_size - i - 1));
721 r = sd_rtnl_message_get_errno(incoming);
725 r = sd_rtnl_message_get_type(incoming, &type);
729 if (type == NLMSG_DONE) {
743 r = socket_read_message(rtnl);
747 /* received message, so try to process straight away */
753 n = now(CLOCK_MONOTONIC);
759 left = (uint64_t) -1;
761 r = rtnl_poll(rtnl, true, left);
767 r = dispatch_wqueue(rtnl);
773 int sd_rtnl_flush(sd_rtnl *rtnl) {
776 assert_return(rtnl, -EINVAL);
777 assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
779 if (rtnl->wqueue_size <= 0)
783 r = dispatch_wqueue(rtnl);
787 if (rtnl->wqueue_size <= 0)
790 r = rtnl_poll(rtnl, false, (uint64_t) -1);
796 int sd_rtnl_get_events(sd_rtnl *rtnl) {
799 assert_return(rtnl, -EINVAL);
800 assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
802 if (rtnl->rqueue_size <= 0)
804 if (rtnl->wqueue_size > 0)
810 int sd_rtnl_get_timeout(sd_rtnl *rtnl, uint64_t *timeout_usec) {
811 struct reply_callback *c;
813 assert_return(rtnl, -EINVAL);
814 assert_return(timeout_usec, -EINVAL);
815 assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
817 if (rtnl->rqueue_size > 0) {
822 c = prioq_peek(rtnl->reply_callbacks_prioq);
824 *timeout_usec = (uint64_t) -1;
828 *timeout_usec = c->timeout;
833 static int io_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
834 sd_rtnl *rtnl = userdata;
839 r = sd_rtnl_process(rtnl, NULL);
846 static int time_callback(sd_event_source *s, uint64_t usec, void *userdata) {
847 sd_rtnl *rtnl = userdata;
852 r = sd_rtnl_process(rtnl, NULL);
859 static int prepare_callback(sd_event_source *s, void *userdata) {
860 sd_rtnl *rtnl = userdata;
867 e = sd_rtnl_get_events(rtnl);
871 r = sd_event_source_set_io_events(rtnl->io_event_source, e);
875 r = sd_rtnl_get_timeout(rtnl, &until);
881 j = sd_event_source_set_time(rtnl->time_event_source, until);
886 r = sd_event_source_set_enabled(rtnl->time_event_source, r > 0);
893 static int exit_callback(sd_event_source *event, void *userdata) {
894 sd_rtnl *rtnl = userdata;
903 int sd_rtnl_attach_event(sd_rtnl *rtnl, sd_event *event, int priority) {
906 assert_return(rtnl, -EINVAL);
907 assert_return(!rtnl->event, -EBUSY);
909 assert(!rtnl->io_event_source);
910 assert(!rtnl->time_event_source);
913 rtnl->event = sd_event_ref(event);
915 r = sd_event_default(&rtnl->event);
920 r = sd_event_add_io(rtnl->event, &rtnl->io_event_source, rtnl->fd, 0, io_callback, rtnl);
924 r = sd_event_source_set_priority(rtnl->io_event_source, priority);
928 r = sd_event_source_set_description(rtnl->io_event_source, "rtnl-receive-message");
932 r = sd_event_source_set_prepare(rtnl->io_event_source, prepare_callback);
936 r = sd_event_add_time(rtnl->event, &rtnl->time_event_source, CLOCK_MONOTONIC, 0, 0, time_callback, rtnl);
940 r = sd_event_source_set_priority(rtnl->time_event_source, priority);
944 r = sd_event_source_set_description(rtnl->time_event_source, "rtnl-timer");
948 r = sd_event_add_exit(rtnl->event, &rtnl->exit_event_source, exit_callback, rtnl);
952 r = sd_event_source_set_description(rtnl->exit_event_source, "rtnl-exit");
959 sd_rtnl_detach_event(rtnl);
963 int sd_rtnl_detach_event(sd_rtnl *rtnl) {
964 assert_return(rtnl, -EINVAL);
965 assert_return(rtnl->event, -ENXIO);
967 if (rtnl->io_event_source)
968 rtnl->io_event_source = sd_event_source_unref(rtnl->io_event_source);
970 if (rtnl->time_event_source)
971 rtnl->time_event_source = sd_event_source_unref(rtnl->time_event_source);
973 if (rtnl->exit_event_source)
974 rtnl->exit_event_source = sd_event_source_unref(rtnl->exit_event_source);
977 rtnl->event = sd_event_unref(rtnl->event);
982 int sd_rtnl_add_match(sd_rtnl *rtnl,
984 sd_rtnl_message_handler_t callback,
986 _cleanup_free_ struct match_callback *c = NULL;
989 assert_return(rtnl, -EINVAL);
990 assert_return(callback, -EINVAL);
991 assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
993 c = new0(struct match_callback, 1);
997 c->callback = callback;
999 c->userdata = userdata;
1006 r = rtnl_join_broadcast_group(rtnl, RTNLGRP_LINK);
1014 r = rtnl_join_broadcast_group(rtnl, RTNLGRP_IPV4_IFADDR);
1018 r = rtnl_join_broadcast_group(rtnl, RTNLGRP_IPV6_IFADDR);
1027 LIST_PREPEND(match_callbacks, rtnl->match_callbacks, c);
1034 int sd_rtnl_remove_match(sd_rtnl *rtnl,
1036 sd_rtnl_message_handler_t callback,
1038 struct match_callback *c;
1040 assert_return(rtnl, -EINVAL);
1041 assert_return(callback, -EINVAL);
1042 assert_return(!rtnl_pid_changed(rtnl), -ECHILD);
1044 /* we should unsubscribe from the broadcast groups at this point, but it is not so
1045 trivial for a few reasons: the refcounting is a bit of a mess and not obvious
1046 how it will look like after we add genetlink support, and it is also not possible
1047 to query what broadcast groups were subscribed to when we inherit the socket to get
1048 the initial refcount. The latter could indeed be done for the first 32 broadcast
1049 groups (which incidentally is all we currently support in .socket units anyway),
1050 but we better not rely on only ever using 32 groups. */
1051 LIST_FOREACH(match_callbacks, c, rtnl->match_callbacks)
1052 if (c->callback == callback && c->type == type && c->userdata == userdata) {
1053 LIST_REMOVE(match_callbacks, rtnl->match_callbacks, c);