2 This file is part of systemd.
4 Copyright 2008-2012 Kay Sievers <kay@vrfy.org>
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
28 #include <linux/netlink.h>
29 #include <linux/filter.h>
32 #include "libudev-private.h"
33 #include "socket-util.h"
37 * SECTION:libudev-monitor
38 * @short_description: device event source
40 * Connects to a device event source.
46 * Opaque object handling an event source.
52 union sockaddr_union snl;
53 union sockaddr_union snl_trusted_sender;
54 union sockaddr_union snl_destination;
56 struct udev_list filter_subsystem_list;
57 struct udev_list filter_tag_list;
61 enum udev_monitor_netlink_group {
67 #define UDEV_MONITOR_MAGIC 0xfeedcafe
68 struct udev_monitor_netlink_header {
69 /* "libudev" prefix to distinguish libudev and kernel messages */
72 * magic to protect against daemon <-> library message format mismatch
73 * used in the kernel from socket filter rules; needs to be stored in network order
76 /* total length of header structure known to the sender */
77 unsigned int header_size;
78 /* properties string buffer */
79 unsigned int properties_off;
80 unsigned int properties_len;
82 * hashes of primary device properties strings, to let libudev subscribers
83 * use in-kernel socket filters; values need to be stored in network order
85 unsigned int filter_subsystem_hash;
86 unsigned int filter_devtype_hash;
87 unsigned int filter_tag_bloom_hi;
88 unsigned int filter_tag_bloom_lo;
91 static struct udev_monitor *udev_monitor_new(struct udev *udev)
93 struct udev_monitor *udev_monitor;
95 udev_monitor = new0(struct udev_monitor, 1);
96 if (udev_monitor == NULL)
98 udev_monitor->refcount = 1;
99 udev_monitor->udev = udev;
100 udev_list_init(udev, &udev_monitor->filter_subsystem_list, false);
101 udev_list_init(udev, &udev_monitor->filter_tag_list, true);
105 /* we consider udev running when /dev is on devtmpfs */
106 static bool udev_has_devtmpfs(struct udev *udev) {
108 union file_handle_union h = FILE_HANDLE_INIT;
109 _cleanup_fclose_ FILE *f = NULL;
110 char line[LINE_MAX], *e;
114 r = name_to_handle_at(AT_FDCWD, "/dev", &h.handle, &mount_id, 0);
116 if (errno != EOPNOTSUPP)
117 log_debug_errno(errno, "name_to_handle_at on /dev: %m");
121 f = fopen("/proc/self/mountinfo", "re");
125 FOREACH_LINE(line, f, return false) {
128 if (sscanf(line, "%i", &mid) != 1)
134 e = strstr(line, " - ");
138 /* accept any name that starts with the currently expected type */
139 if (startswith(e + 3, "devtmpfs"))
146 struct udev_monitor *udev_monitor_new_from_netlink_fd(struct udev *udev, const char *name, int fd)
148 struct udev_monitor *udev_monitor;
155 group = UDEV_MONITOR_NONE;
156 else if (streq(name, "udev")) {
158 * We do not support subscribing to uevents if no instance of
159 * udev is running. Uevents would otherwise broadcast the
160 * processing data of the host into containers, which is not
163 * Containers will currently not get any udev uevents, until
164 * a supporting infrastructure is available.
166 * We do not set a netlink multicast group here, so the socket
167 * will not receive any messages.
169 if (access("/run/udev/control", F_OK) < 0 && !udev_has_devtmpfs(udev)) {
170 log_debug("the udev service seems not to be active, disable the monitor");
171 group = UDEV_MONITOR_NONE;
173 group = UDEV_MONITOR_UDEV;
174 } else if (streq(name, "kernel"))
175 group = UDEV_MONITOR_KERNEL;
179 udev_monitor = udev_monitor_new(udev);
180 if (udev_monitor == NULL)
184 udev_monitor->sock = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, NETLINK_KOBJECT_UEVENT);
185 if (udev_monitor->sock == -1) {
186 log_debug_errno(errno, "error getting socket: %m");
191 udev_monitor->bound = true;
192 udev_monitor->sock = fd;
195 udev_monitor->snl.nl.nl_family = AF_NETLINK;
196 udev_monitor->snl.nl.nl_groups = group;
198 /* default destination for sending */
199 udev_monitor->snl_destination.nl.nl_family = AF_NETLINK;
200 udev_monitor->snl_destination.nl.nl_groups = UDEV_MONITOR_UDEV;
206 * udev_monitor_new_from_netlink:
207 * @udev: udev library context
208 * @name: name of event source
210 * Create new udev monitor and connect to a specified event
211 * source. Valid sources identifiers are "udev" and "kernel".
213 * Applications should usually not connect directly to the
214 * "kernel" events, because the devices might not be useable
215 * at that time, before udev has configured them, and created
216 * device nodes. Accessing devices at the same time as udev,
217 * might result in unpredictable behavior. The "udev" events
218 * are sent out after udev has finished its event processing,
219 * all rules have been processed, and needed device nodes are
222 * The initial refcount is 1, and needs to be decremented to
223 * release the resources of the udev monitor.
225 * Returns: a new udev monitor, or #NULL, in case of an error
227 _public_ struct udev_monitor *udev_monitor_new_from_netlink(struct udev *udev, const char *name)
229 return udev_monitor_new_from_netlink_fd(udev, name, -1);
232 static inline void bpf_stmt(struct sock_filter *inss, unsigned int *i,
233 unsigned short code, unsigned int data)
235 struct sock_filter *ins = &inss[*i];
242 static inline void bpf_jmp(struct sock_filter *inss, unsigned int *i,
243 unsigned short code, unsigned int data,
244 unsigned short jt, unsigned short jf)
246 struct sock_filter *ins = &inss[*i];
256 * udev_monitor_filter_update:
257 * @udev_monitor: monitor
259 * Update the installed socket filter. This is only needed,
260 * if the filter was removed or changed.
262 * Returns: 0 on success, otherwise a negative error value.
264 _public_ int udev_monitor_filter_update(struct udev_monitor *udev_monitor)
266 struct sock_filter ins[512];
267 struct sock_fprog filter;
269 struct udev_list_entry *list_entry;
272 if (udev_list_get_entry(&udev_monitor->filter_subsystem_list) == NULL &&
273 udev_list_get_entry(&udev_monitor->filter_tag_list) == NULL)
276 memzero(ins, sizeof(ins));
279 /* load magic in A */
280 bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(struct udev_monitor_netlink_header, magic));
281 /* jump if magic matches */
282 bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, UDEV_MONITOR_MAGIC, 1, 0);
283 /* wrong magic, pass packet */
284 bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
286 if (udev_list_get_entry(&udev_monitor->filter_tag_list) != NULL) {
289 /* count tag matches, to calculate end of tag match block */
291 udev_list_entry_foreach(list_entry, udev_list_get_entry(&udev_monitor->filter_tag_list))
294 /* add all tags matches */
295 udev_list_entry_foreach(list_entry, udev_list_get_entry(&udev_monitor->filter_tag_list)) {
296 uint64_t tag_bloom_bits = util_string_bloom64(udev_list_entry_get_name(list_entry));
297 uint32_t tag_bloom_hi = tag_bloom_bits >> 32;
298 uint32_t tag_bloom_lo = tag_bloom_bits & 0xffffffff;
300 /* load device bloom bits in A */
301 bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(struct udev_monitor_netlink_header, filter_tag_bloom_hi));
302 /* clear bits (tag bits & bloom bits) */
303 bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_hi);
304 /* jump to next tag if it does not match */
305 bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_hi, 0, 3);
307 /* load device bloom bits in A */
308 bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(struct udev_monitor_netlink_header, filter_tag_bloom_lo));
309 /* clear bits (tag bits & bloom bits) */
310 bpf_stmt(ins, &i, BPF_ALU|BPF_AND|BPF_K, tag_bloom_lo);
311 /* jump behind end of tag match block if tag matches */
313 bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, tag_bloom_lo, 1 + (tag_matches * 6), 0);
316 /* nothing matched, drop packet */
317 bpf_stmt(ins, &i, BPF_RET|BPF_K, 0);
320 /* add all subsystem matches */
321 if (udev_list_get_entry(&udev_monitor->filter_subsystem_list) != NULL) {
322 udev_list_entry_foreach(list_entry, udev_list_get_entry(&udev_monitor->filter_subsystem_list)) {
323 unsigned int hash = util_string_hash32(udev_list_entry_get_name(list_entry));
325 /* load device subsystem value in A */
326 bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(struct udev_monitor_netlink_header, filter_subsystem_hash));
327 if (udev_list_entry_get_value(list_entry) == NULL) {
328 /* jump if subsystem does not match */
329 bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1);
331 /* jump if subsystem does not match */
332 bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 3);
334 /* load device devtype value in A */
335 bpf_stmt(ins, &i, BPF_LD|BPF_W|BPF_ABS, offsetof(struct udev_monitor_netlink_header, filter_devtype_hash));
336 /* jump if value does not match */
337 hash = util_string_hash32(udev_list_entry_get_value(list_entry));
338 bpf_jmp(ins, &i, BPF_JMP|BPF_JEQ|BPF_K, hash, 0, 1);
341 /* matched, pass packet */
342 bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
344 if (i+1 >= ELEMENTSOF(ins))
348 /* nothing matched, drop packet */
349 bpf_stmt(ins, &i, BPF_RET|BPF_K, 0);
352 /* matched, pass packet */
353 bpf_stmt(ins, &i, BPF_RET|BPF_K, 0xffffffff);
356 memzero(&filter, sizeof(filter));
359 err = setsockopt(udev_monitor->sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter));
360 return err < 0 ? -errno : 0;
363 int udev_monitor_allow_unicast_sender(struct udev_monitor *udev_monitor, struct udev_monitor *sender)
365 udev_monitor->snl_trusted_sender.nl.nl_pid = sender->snl.nl.nl_pid;
369 * udev_monitor_enable_receiving:
370 * @udev_monitor: the monitor which should receive events
372 * Binds the @udev_monitor socket to the event source.
374 * Returns: 0 on success, otherwise a negative error value.
376 _public_ int udev_monitor_enable_receiving(struct udev_monitor *udev_monitor)
381 udev_monitor_filter_update(udev_monitor);
383 if (!udev_monitor->bound) {
384 err = bind(udev_monitor->sock,
385 &udev_monitor->snl.sa, sizeof(struct sockaddr_nl));
387 udev_monitor->bound = true;
391 union sockaddr_union snl;
395 * get the address the kernel has assigned us
396 * it is usually, but not necessarily the pid
398 addrlen = sizeof(struct sockaddr_nl);
399 err = getsockname(udev_monitor->sock, &snl.sa, &addrlen);
401 udev_monitor->snl.nl.nl_pid = snl.nl.nl_pid;
403 log_debug_errno(errno, "bind failed: %m");
407 /* enable receiving of sender credentials */
408 err = setsockopt(udev_monitor->sock, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on));
410 log_debug_errno(errno, "setting SO_PASSCRED failed: %m");
416 * udev_monitor_set_receive_buffer_size:
417 * @udev_monitor: the monitor which should receive events
418 * @size: the size in bytes
420 * Set the size of the kernel socket buffer. This call needs the
421 * appropriate privileges to succeed.
423 * Returns: 0 on success, otherwise -1 on error.
425 _public_ int udev_monitor_set_receive_buffer_size(struct udev_monitor *udev_monitor, int size)
427 if (udev_monitor == NULL)
429 return setsockopt(udev_monitor->sock, SOL_SOCKET, SO_RCVBUFFORCE, &size, sizeof(size));
432 int udev_monitor_disconnect(struct udev_monitor *udev_monitor)
436 err = close(udev_monitor->sock);
437 udev_monitor->sock = -1;
438 return err < 0 ? -errno : 0;
443 * @udev_monitor: udev monitor
445 * Take a reference of a udev monitor.
447 * Returns: the passed udev monitor
449 _public_ struct udev_monitor *udev_monitor_ref(struct udev_monitor *udev_monitor)
451 if (udev_monitor == NULL)
453 udev_monitor->refcount++;
458 * udev_monitor_unref:
459 * @udev_monitor: udev monitor
461 * Drop a reference of a udev monitor. If the refcount reaches zero,
462 * the bound socket will be closed, and the resources of the monitor
467 _public_ struct udev_monitor *udev_monitor_unref(struct udev_monitor *udev_monitor)
469 if (udev_monitor == NULL)
471 udev_monitor->refcount--;
472 if (udev_monitor->refcount > 0)
474 if (udev_monitor->sock >= 0)
475 close(udev_monitor->sock);
476 udev_list_cleanup(&udev_monitor->filter_subsystem_list);
477 udev_list_cleanup(&udev_monitor->filter_tag_list);
483 * udev_monitor_get_udev:
484 * @udev_monitor: udev monitor
486 * Retrieve the udev library context the monitor was created with.
488 * Returns: the udev library context
490 _public_ struct udev *udev_monitor_get_udev(struct udev_monitor *udev_monitor)
492 if (udev_monitor == NULL)
494 return udev_monitor->udev;
498 * udev_monitor_get_fd:
499 * @udev_monitor: udev monitor
501 * Retrieve the socket file descriptor associated with the monitor.
503 * Returns: the socket file descriptor
505 _public_ int udev_monitor_get_fd(struct udev_monitor *udev_monitor)
507 if (udev_monitor == NULL)
509 return udev_monitor->sock;
512 static int passes_filter(struct udev_monitor *udev_monitor, struct udev_device *udev_device)
514 struct udev_list_entry *list_entry;
516 if (udev_list_get_entry(&udev_monitor->filter_subsystem_list) == NULL)
518 udev_list_entry_foreach(list_entry, udev_list_get_entry(&udev_monitor->filter_subsystem_list)) {
519 const char *subsys = udev_list_entry_get_name(list_entry);
520 const char *dsubsys = udev_device_get_subsystem(udev_device);
522 const char *ddevtype;
524 if (!streq(dsubsys, subsys))
527 devtype = udev_list_entry_get_value(list_entry);
530 ddevtype = udev_device_get_devtype(udev_device);
531 if (ddevtype == NULL)
533 if (streq(ddevtype, devtype))
539 if (udev_list_get_entry(&udev_monitor->filter_tag_list) == NULL)
541 udev_list_entry_foreach(list_entry, udev_list_get_entry(&udev_monitor->filter_tag_list)) {
542 const char *tag = udev_list_entry_get_name(list_entry);
544 if (udev_device_has_tag(udev_device, tag))
551 * udev_monitor_receive_device:
552 * @udev_monitor: udev monitor
554 * Receive data from the udev monitor socket, allocate a new udev
555 * device, fill in the received data, and return the device.
557 * Only socket connections with uid=0 are accepted.
559 * The monitor socket is by default set to NONBLOCK. A variant of poll() on
560 * the file descriptor returned by udev_monitor_get_fd() should to be used to
561 * wake up when new devices arrive, or alternatively the file descriptor
562 * switched into blocking mode.
564 * The initial refcount is 1, and needs to be decremented to
565 * release the resources of the udev device.
567 * Returns: a new udev device, or #NULL, in case of an error
569 _public_ struct udev_device *udev_monitor_receive_device(struct udev_monitor *udev_monitor)
571 struct udev_device *udev_device;
574 char cred_msg[CMSG_SPACE(sizeof(struct ucred))];
575 struct cmsghdr *cmsg;
576 union sockaddr_union snl;
579 struct udev_monitor_netlink_header nlh;
584 bool is_initialized = false;
587 if (udev_monitor == NULL)
590 iov.iov_len = sizeof(buf);
591 memzero(&smsg, sizeof(struct msghdr));
594 smsg.msg_control = cred_msg;
595 smsg.msg_controllen = sizeof(cred_msg);
596 smsg.msg_name = &snl;
597 smsg.msg_namelen = sizeof(snl);
599 buflen = recvmsg(udev_monitor->sock, &smsg, 0);
602 log_debug("unable to receive message");
606 if (buflen < 32 || (smsg.msg_flags & MSG_TRUNC)) {
607 log_debug("invalid message length");
611 if (snl.nl.nl_groups == 0) {
612 /* unicast message, check if we trust the sender */
613 if (udev_monitor->snl_trusted_sender.nl.nl_pid == 0 ||
614 snl.nl.nl_pid != udev_monitor->snl_trusted_sender.nl.nl_pid) {
615 log_debug("unicast netlink message ignored");
618 } else if (snl.nl.nl_groups == UDEV_MONITOR_KERNEL) {
619 if (snl.nl.nl_pid > 0) {
620 log_debug("multicast kernel netlink message from PID %"PRIu32" ignored",
626 cmsg = CMSG_FIRSTHDR(&smsg);
627 if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) {
628 log_debug("no sender credentials received, message ignored");
632 cred = (struct ucred *)CMSG_DATA(cmsg);
633 if (cred->uid != 0) {
634 log_debug("sender uid="UID_FMT", message ignored", cred->uid);
638 if (memcmp(buf.raw, "libudev", 8) == 0) {
639 /* udev message needs proper version magic */
640 if (buf.nlh.magic != htonl(UDEV_MONITOR_MAGIC)) {
641 log_debug("unrecognized message signature (%x != %x)",
642 buf.nlh.magic, htonl(UDEV_MONITOR_MAGIC));
645 if (buf.nlh.properties_off+32 > (size_t)buflen) {
649 bufpos = buf.nlh.properties_off;
651 /* devices received from udev are always initialized */
652 is_initialized = true;
654 /* kernel message with header */
655 bufpos = strlen(buf.raw) + 1;
656 if ((size_t)bufpos < sizeof("a@/d") || bufpos >= buflen) {
657 log_debug("invalid message length");
661 /* check message header */
662 if (strstr(buf.raw, "@/") == NULL) {
663 log_debug("unrecognized message header");
668 udev_device = udev_device_new_from_nulstr(udev_monitor->udev, &buf.raw[bufpos], buflen - bufpos);
673 udev_device_set_is_initialized(udev_device);
675 /* skip device, if it does not pass the current filter */
676 if (!passes_filter(udev_monitor, udev_device)) {
677 struct pollfd pfd[1];
680 udev_device_unref(udev_device);
682 /* if something is queued, get next device */
683 pfd[0].fd = udev_monitor->sock;
684 pfd[0].events = POLLIN;
685 rc = poll(pfd, 1, 0);
694 int udev_monitor_send_device(struct udev_monitor *udev_monitor,
695 struct udev_monitor *destination, struct udev_device *udev_device)
703 struct udev_monitor_netlink_header nlh;
704 struct udev_list_entry *list_entry;
705 uint64_t tag_bloom_bits;
707 blen = udev_device_get_properties_monitor_buf(udev_device, &buf);
711 /* add versioned header */
712 memzero(&nlh, sizeof(struct udev_monitor_netlink_header));
713 memcpy(nlh.prefix, "libudev", 8);
714 nlh.magic = htonl(UDEV_MONITOR_MAGIC);
715 nlh.header_size = sizeof(struct udev_monitor_netlink_header);
716 val = udev_device_get_subsystem(udev_device);
717 nlh.filter_subsystem_hash = htonl(util_string_hash32(val));
718 val = udev_device_get_devtype(udev_device);
720 nlh.filter_devtype_hash = htonl(util_string_hash32(val));
721 iov[0].iov_base = &nlh;
722 iov[0].iov_len = sizeof(struct udev_monitor_netlink_header);
724 /* add tag bloom filter */
726 udev_list_entry_foreach(list_entry, udev_device_get_tags_list_entry(udev_device))
727 tag_bloom_bits |= util_string_bloom64(udev_list_entry_get_name(list_entry));
728 if (tag_bloom_bits > 0) {
729 nlh.filter_tag_bloom_hi = htonl(tag_bloom_bits >> 32);
730 nlh.filter_tag_bloom_lo = htonl(tag_bloom_bits & 0xffffffff);
733 /* add properties list */
734 nlh.properties_off = iov[0].iov_len;
735 nlh.properties_len = blen;
736 iov[1].iov_base = (char *)buf;
737 iov[1].iov_len = blen;
739 memzero(&smsg, sizeof(struct msghdr));
743 * Use custom address for target, or the default one.
745 * If we send to a multicast group, we will get
746 * ECONNREFUSED, which is expected.
748 if (destination != NULL)
749 smsg.msg_name = &destination->snl;
751 smsg.msg_name = &udev_monitor->snl_destination;
752 smsg.msg_namelen = sizeof(struct sockaddr_nl);
753 count = sendmsg(udev_monitor->sock, &smsg, 0);
754 log_debug("passed %zi bytes to netlink monitor %p", count, udev_monitor);
759 * udev_monitor_filter_add_match_subsystem_devtype:
760 * @udev_monitor: the monitor
761 * @subsystem: the subsystem value to match the incoming devices against
762 * @devtype: the devtype value to match the incoming devices against
764 * This filter is efficiently executed inside the kernel, and libudev subscribers
765 * will usually not be woken up for devices which do not match.
767 * The filter must be installed before the monitor is switched to listening mode.
769 * Returns: 0 on success, otherwise a negative error value.
771 _public_ int udev_monitor_filter_add_match_subsystem_devtype(struct udev_monitor *udev_monitor, const char *subsystem, const char *devtype)
773 if (udev_monitor == NULL)
775 if (subsystem == NULL)
777 if (udev_list_entry_add(&udev_monitor->filter_subsystem_list, subsystem, devtype) == NULL)
783 * udev_monitor_filter_add_match_tag:
784 * @udev_monitor: the monitor
785 * @tag: the name of a tag
787 * This filter is efficiently executed inside the kernel, and libudev subscribers
788 * will usually not be woken up for devices which do not match.
790 * The filter must be installed before the monitor is switched to listening mode.
792 * Returns: 0 on success, otherwise a negative error value.
794 _public_ int udev_monitor_filter_add_match_tag(struct udev_monitor *udev_monitor, const char *tag)
796 if (udev_monitor == NULL)
800 if (udev_list_entry_add(&udev_monitor->filter_tag_list, tag, NULL) == NULL)
806 * udev_monitor_filter_remove:
807 * @udev_monitor: monitor
809 * Remove all filters from monitor.
811 * Returns: 0 on success, otherwise a negative error value.
813 _public_ int udev_monitor_filter_remove(struct udev_monitor *udev_monitor)
815 static struct sock_fprog filter = { 0, NULL };
817 udev_list_cleanup(&udev_monitor->filter_subsystem_list);
818 return setsockopt(udev_monitor->sock, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter));