+static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
+ assert(e);
+
+ if (!d)
+ return;
+
+ assert(hashmap_isempty(d->inodes));
+ assert(hashmap_isempty(d->wd));
+
+ if (d->buffer_filled > 0)
+ LIST_REMOVE(buffered, e->inotify_data_buffered, d);
+
+ hashmap_free(d->inodes);
+ hashmap_free(d->wd);
+
+ assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
+
+ if (d->fd >= 0) {
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
+ log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
+
+ safe_close(d->fd);
+ }
+ free(d);
+}
+
+static int event_make_inotify_data(
+ sd_event *e,
+ int64_t priority,
+ struct inotify_data **ret) {
+
+ _cleanup_close_ int fd = -1;
+ struct inotify_data *d;
+ struct epoll_event ev;
+ int r;
+
+ assert(e);
+
+ d = hashmap_get(e->inotify_data, &priority);
+ if (d) {
+ if (ret)
+ *ret = d;
+ return 0;
+ }
+
+ fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ fd = fd_move_above_stdio(fd);
+
+ r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new(struct inotify_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct inotify_data) {
+ .wakeup = WAKEUP_INOTIFY_DATA,
+ .fd = TAKE_FD(fd),
+ .priority = priority,
+ };
+
+ r = hashmap_put(e->inotify_data, &d->priority, d);
+ if (r < 0) {
+ d->fd = safe_close(d->fd);
+ free(d);
+ return r;
+ }
+
+ ev = (struct epoll_event) {
+ .events = EPOLLIN,
+ .data.ptr = d,
+ };
+
+ if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
+ r = -errno;
+ d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
+ * remove the fd from the epoll first, which we don't want as we couldn't
+ * add it in the first place. */
+ event_free_inotify_data(e, d);
+ return r;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 1;
+}
+
+static int inode_data_compare(const void *a, const void *b) {
+ const struct inode_data *x = a, *y = b;
+
+ assert(x);
+ assert(y);
+
+ if (x->dev < y->dev)
+ return -1;
+ if (x->dev > y->dev)
+ return 1;
+
+ if (x->ino < y->ino)
+ return -1;
+ if (x->ino > y->ino)
+ return 1;
+
+ return 0;
+}
+
+static void inode_data_hash_func(const void *p, struct siphash *state) {
+ const struct inode_data *d = p;
+
+ assert(p);
+
+ siphash24_compress(&d->dev, sizeof(d->dev), state);
+ siphash24_compress(&d->ino, sizeof(d->ino), state);
+}
+
+const struct hash_ops inode_data_hash_ops = {
+ .hash = inode_data_hash_func,
+ .compare = inode_data_compare
+};
+
+static void event_free_inode_data(
+ sd_event *e,
+ struct inode_data *d) {
+
+ assert(e);
+
+ if (!d)
+ return;
+
+ assert(!d->event_sources);
+
+ if (d->fd >= 0) {
+ LIST_REMOVE(to_close, e->inode_data_to_close, d);
+ safe_close(d->fd);
+ }
+
+ if (d->inotify_data) {
+
+ if (d->wd >= 0) {
+ if (d->inotify_data->fd >= 0) {
+ /* So here's a problem. At the time this runs the watch descriptor might already be
+ * invalidated, because an IN_IGNORED event might be queued right the moment we enter
+ * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
+ * likely case to happen. */
+
+ if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
+ log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
+ }
+
+ assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
+ }
+
+ assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
+ }
+
+ free(d);
+}
+
+static void event_gc_inode_data(
+ sd_event *e,
+ struct inode_data *d) {
+
+ struct inotify_data *inotify_data;
+
+ assert(e);
+
+ if (!d)
+ return;
+
+ if (d->event_sources)
+ return;
+
+ inotify_data = d->inotify_data;
+ event_free_inode_data(e, d);
+
+ if (inotify_data && hashmap_isempty(inotify_data->inodes))
+ event_free_inotify_data(e, inotify_data);
+}
+
+static int event_make_inode_data(
+ sd_event *e,
+ struct inotify_data *inotify_data,
+ dev_t dev,
+ ino_t ino,
+ struct inode_data **ret) {
+
+ struct inode_data *d, key;
+ int r;
+
+ assert(e);
+ assert(inotify_data);
+
+ key = (struct inode_data) {
+ .ino = ino,
+ .dev = dev,
+ };
+
+ d = hashmap_get(inotify_data->inodes, &key);
+ if (d) {
+ if (ret)
+ *ret = d;
+
+ return 0;
+ }
+
+ r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
+ if (r < 0)
+ return r;
+
+ d = new(struct inode_data, 1);
+ if (!d)
+ return -ENOMEM;
+
+ *d = (struct inode_data) {
+ .dev = dev,
+ .ino = ino,
+ .wd = -1,
+ .fd = -1,
+ .inotify_data = inotify_data,
+ };
+
+ r = hashmap_put(inotify_data->inodes, d, d);
+ if (r < 0) {
+ free(d);
+ return r;
+ }
+
+ if (ret)
+ *ret = d;
+
+ return 1;
+}
+
+static uint32_t inode_data_determine_mask(struct inode_data *d) {
+ bool excl_unlink = true;
+ uint32_t combined = 0;
+ sd_event_source *s;
+
+ assert(d);
+
+ /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
+ * the IN_EXCL_UNLINK flag is ANDed instead.
+ *
+ * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
+ * because we cannot change the mask anymore after the event source was created once, since the kernel has no
+ * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
+ * events we don't care for client-side. */
+
+ LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
+
+ if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
+ excl_unlink = false;
+
+ combined |= s->inotify.mask;
+ }
+
+ return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
+}
+
+static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
+ uint32_t combined_mask;
+ int wd, r;
+
+ assert(d);
+ assert(d->fd >= 0);
+
+ combined_mask = inode_data_determine_mask(d);
+
+ if (d->wd >= 0 && combined_mask == d->combined_mask)
+ return 0;
+
+ r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
+ if (r < 0)
+ return r;
+
+ wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
+ if (wd < 0)
+ return -errno;
+
+ if (d->wd < 0) {
+ r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
+ if (r < 0) {
+ (void) inotify_rm_watch(d->inotify_data->fd, wd);
+ return r;
+ }
+
+ d->wd = wd;
+
+ } else if (d->wd != wd) {
+
+ log_debug("Weird, the watch descriptor we already knew for this inode changed?");
+ (void) inotify_rm_watch(d->fd, wd);
+ return -EINVAL;
+ }
+
+ d->combined_mask = combined_mask;
+ return 1;
+}
+
+_public_ int sd_event_add_inotify(
+ sd_event *e,
+ sd_event_source **ret,
+ const char *path,
+ uint32_t mask,
+ sd_event_inotify_handler_t callback,
+ void *userdata) {
+
+ bool rm_inotify = false, rm_inode = false;
+ struct inotify_data *inotify_data = NULL;
+ struct inode_data *inode_data = NULL;
+ _cleanup_close_ int fd = -1;
+ sd_event_source *s;
+ struct stat st;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(e = event_resolve(e), -ENOPKG);
+ assert_return(path, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
+ * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
+ * the user can't use them for us. */
+ if (mask & IN_MASK_ADD)
+ return -EINVAL;
+
+ fd = open(path, O_PATH|O_CLOEXEC|
+ (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
+ (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ s = source_new(e, !ret, SOURCE_INOTIFY);
+ if (!s)
+ return -ENOMEM;
+
+ s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
+ s->inotify.mask = mask;
+ s->inotify.callback = callback;
+ s->userdata = userdata;
+
+ /* Allocate an inotify object for this priority, and an inode object within it */
+ r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
+ if (r < 0)
+ goto fail;
+ rm_inotify = r > 0;
+
+ r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
+ if (r < 0)
+ goto fail;
+ rm_inode = r > 0;
+
+ /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
+ * the event source, until then, for which we need the original inode. */
+ if (inode_data->fd < 0) {
+ inode_data->fd = TAKE_FD(fd);
+ LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
+ }
+
+ /* Link our event source to the inode data object */
+ LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
+ s->inotify.inode_data = inode_data;
+
+ rm_inode = rm_inotify = false;
+
+ /* Actually realize the watch now */
+ r = inode_data_realize_watch(e, inode_data);
+ if (r < 0)
+ goto fail;
+
+ (void) sd_event_source_set_description(s, path);
+
+ if (ret)
+ *ret = s;
+
+ return 0;
+
+fail:
+ source_free(s);
+
+ if (rm_inode)
+ event_free_inode_data(e, inode_data);
+
+ if (rm_inotify)
+ event_free_inotify_data(e, inotify_data);
+
+ return r;
+}
+