chiark / gitweb /
util: replace RUN_WITH_LOCALE with extended locale functions
[elogind.git] / src / shared / util.c
index 97ff320bc8320c82b3d2f44b72f08dfe31eea4f7..8f6d5e660c657356a429d115c17823f3412b2945 100644 (file)
@@ -39,7 +39,6 @@
 #include <linux/tiocl.h>
 #include <termios.h>
 #include <stdarg.h>
-#include <sys/inotify.h>
 #include <sys/poll.h>
 #include <ctype.h>
 #include <sys/prctl.h>
 #include <langinfo.h>
 #include <locale.h>
 #include <sys/personality.h>
+#include <sys/xattr.h>
 #include <libgen.h>
+#include <sys/statvfs.h>
+#include <sys/file.h>
+#include <linux/fs.h>
 #undef basename
 
 #ifdef HAVE_SYS_AUXV_H
@@ -85,6 +88,7 @@
 #include "gunicode.h"
 #include "virt.h"
 #include "def.h"
+#include "sparse-endian.h"
 
 int saved_argc = 0;
 char **saved_argv = NULL;
@@ -503,18 +507,24 @@ int safe_atolli(const char *s, long long int *ret_lli) {
 int safe_atod(const char *s, double *ret_d) {
         char *x = NULL;
         double d = 0;
+        locale_t loc;
 
         assert(s);
         assert(ret_d);
 
-        RUN_WITH_LOCALE(LC_NUMERIC_MASK, "C") {
-                errno = 0;
-                d = strtod(s, &x);
-        }
+        loc = newlocale(LC_NUMERIC_MASK, "C", (locale_t) 0);
+        if (loc == (locale_t) 0)
+                return -errno;
 
-        if (!x || x == s || *x || errno)
+        errno = 0;
+        d = strtod_l(s, &x, loc);
+
+        if (!x || x == s || *x || errno) {
+                freelocale(loc);
                 return errno ? -errno : -EINVAL;
+        }
 
+        freelocale(loc);
         *ret_d = (double) d;
         return 0;
 }
@@ -1151,7 +1161,7 @@ char *delete_chars(char *s, const char *bad) {
 }
 
 char *file_in_same_dir(const char *path, const char *filename) {
-        char *e, *r;
+        char *e, *ret;
         size_t k;
 
         assert(path);
@@ -1164,17 +1174,17 @@ char *file_in_same_dir(const char *path, const char *filename) {
         if (path_is_absolute(filename))
                 return strdup(filename);
 
-        if (!(e = strrchr(path, '/')))
+        e = strrchr(path, '/');
+        if (!e)
                 return strdup(filename);
 
         k = strlen(filename);
-        if (!(r = new(char, e-path+1+k+1)))
+        ret = new(char, (e + 1 - path) + k + 1);
+        if (!ret)
                 return NULL;
 
-        memcpy(r, path, e-path+1);
-        memcpy(r+(e-path)+1, filename, k+1);
-
-        return r;
+        memcpy(mempcpy(ret, path, e + 1 - path), filename, k + 1);
+        return ret;
 }
 
 int rmdir_parents(const char *path, const char *stop) {
@@ -1349,12 +1359,19 @@ char *cunescape_length_with_prefix(const char *s, size_t length, const char *pre
                 memcpy(r, prefix, pl);
 
         for (f = s, t = r + pl; f < s + length; f++) {
+                size_t remaining = s + length - f;
+                assert(remaining > 0);
 
-                if (*f != '\\') {
+                if (*f != '\\') {        /* a literal literal */
                         *(t++) = *f;
                         continue;
                 }
 
+                if (--remaining == 0) {  /* copy trailing backslash verbatim */
+                        *(t++) = *f;
+                        break;
+                }
+
                 f++;
 
                 switch (*f) {
@@ -1397,10 +1414,12 @@ char *cunescape_length_with_prefix(const char *s, size_t length, const char *pre
 
                 case 'x': {
                         /* hexadecimal encoding */
-                        int a, b;
+                        int a = -1, b = -1;
 
-                        a = unhexchar(f[1]);
-                        b = unhexchar(f[2]);
+                        if (remaining >= 2) {
+                                a = unhexchar(f[1]);
+                                b = unhexchar(f[2]);
+                        }
 
                         if (a < 0 || b < 0 || (a == 0 && b == 0)) {
                                 /* Invalid escape code, let's take it literal then */
@@ -1423,11 +1442,13 @@ char *cunescape_length_with_prefix(const char *s, size_t length, const char *pre
                 case '6':
                 case '7': {
                         /* octal encoding */
-                        int a, b, c;
+                        int a = -1, b = -1, c = -1;
 
-                        a = unoctchar(f[0]);
-                        b = unoctchar(f[1]);
-                        c = unoctchar(f[2]);
+                        if (remaining >= 3) {
+                                a = unoctchar(f[0]);
+                                b = unoctchar(f[1]);
+                                c = unoctchar(f[2]);
+                        }
 
                         if (a < 0 || b < 0 || c < 0 || (a == 0 && b == 0 && c == 0)) {
                                 /* Invalid escape code, let's take it literal then */
@@ -1441,11 +1462,6 @@ char *cunescape_length_with_prefix(const char *s, size_t length, const char *pre
                         break;
                 }
 
-                case 0:
-                        /* premature end of string. */
-                        *(t++) = '\\';
-                        goto finish;
-
                 default:
                         /* Invalid escape code, let's take it literal then */
                         *(t++) = '\\';
@@ -1454,7 +1470,6 @@ char *cunescape_length_with_prefix(const char *s, size_t length, const char *pre
                 }
         }
 
-finish:
         *t = 0;
         return r;
 }
@@ -2106,7 +2121,7 @@ int acquire_terminal(
                 assert(notify >= 0);
 
                 for (;;) {
-                        uint8_t buffer[INOTIFY_EVENT_MAX] _alignas_(struct inotify_event);
+                        union inotify_event_buffer buffer;
                         struct inotify_event *e;
                         ssize_t l;
 
@@ -2129,7 +2144,7 @@ int acquire_terminal(
                                 }
                         }
 
-                        l = read(notify, buffer, sizeof(buffer));
+                        l = read(notify, &buffer, sizeof(buffer));
                         if (l < 0) {
                                 if (errno == EINTR || errno == EAGAIN)
                                         continue;
@@ -3455,7 +3470,7 @@ unsigned columns(void) {
                 c = 80;
 
         cached_columns = c;
-        return c;
+        return cached_columns;
 }
 
 int fd_lines(int fd) {
@@ -3472,7 +3487,7 @@ int fd_lines(int fd) {
 
 unsigned lines(void) {
         const char *e;
-        unsigned l;
+        int l;
 
         if (_likely_(cached_lines > 0))
                 return cached_lines;
@@ -3480,7 +3495,7 @@ unsigned lines(void) {
         l = 0;
         e = getenv("LINES");
         if (e)
-                (void) safe_atou(e, &l);
+                (void) safe_atoi(e, &l);
 
         if (l <= 0)
                 l = fd_lines(STDOUT_FILENO);
@@ -4003,7 +4018,7 @@ bool tty_is_vc_resolve(const char *tty) {
 const char *default_term_for_tty(const char *tty) {
         assert(tty);
 
-        return tty_is_vc_resolve(tty) ? "TERM=linux" : "TERM=vt102";
+        return tty_is_vc_resolve(tty) ? "TERM=linux" : "TERM=vt220";
 }
 
 bool dirent_is_file(const struct dirent *de) {
@@ -4034,64 +4049,66 @@ bool dirent_is_file_with_suffix(const struct dirent *de, const char *suffix) {
         return endswith(de->d_name, suffix);
 }
 
-void execute_directory(const char *directory, DIR *d, usec_t timeout, char *argv[]) {
-        pid_t executor_pid;
-        int r;
-
-        assert(directory);
+static int do_execute(char **directories, usec_t timeout, char *argv[]) {
+        _cleanup_hashmap_free_free_ Hashmap *pids = NULL;
+        _cleanup_set_free_free_ Set *seen = NULL;
+        char **directory;
 
-        /* Executes all binaries in a directory in parallel and waits
-         * for them to finish. Optionally a timeout is applied. */
+        /* We fork this all off from a child process so that we can
+         * somewhat cleanly make use of SIGALRM to set a time limit */
 
-        executor_pid = fork();
-        if (executor_pid < 0) {
-                log_error_errno(errno, "Failed to fork: %m");
-                return;
+        reset_all_signal_handlers();
+        reset_signal_mask();
 
-        } else if (executor_pid == 0) {
-                _cleanup_hashmap_free_free_ Hashmap *pids = NULL;
-                _cleanup_closedir_ DIR *_d = NULL;
-                struct dirent *de;
+        assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
 
-                /* We fork this all off from a child process so that
-                 * we can somewhat cleanly make use of SIGALRM to set
-                 * a time limit */
+        pids = hashmap_new(NULL);
+        if (!pids)
+                return log_oom();
 
-                reset_all_signal_handlers();
-                reset_signal_mask();
+        seen = set_new(&string_hash_ops);
+        if (!seen)
+                return log_oom();
 
-                assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
+        STRV_FOREACH(directory, directories) {
+                _cleanup_closedir_ DIR *d;
+                struct dirent *de;
 
+                d = opendir(*directory);
                 if (!d) {
-                        d = _d = opendir(directory);
-                        if (!d) {
-                                if (errno == ENOENT)
-                                        _exit(EXIT_SUCCESS);
+                        if (errno == ENOENT)
+                                continue;
 
-                                log_error_errno(errno, "Failed to enumerate directory %s: %m", directory);
-                                _exit(EXIT_FAILURE);
-                        }
-                }
-
-                pids = hashmap_new(NULL);
-                if (!pids) {
-                        log_oom();
-                        _exit(EXIT_FAILURE);
+                        return log_error_errno(errno, "Failed to open directory %s: %m", *directory);
                 }
 
                 FOREACH_DIRENT(de, d, break) {
                         _cleanup_free_ char *path = NULL;
                         pid_t pid;
+                        int r;
 
                         if (!dirent_is_file(de))
                                 continue;
 
-                        path = strjoin(directory, "/", de->d_name, NULL);
-                        if (!path) {
-                                log_oom();
-                                _exit(EXIT_FAILURE);
+                        if (set_contains(seen, de->d_name)) {
+                                log_debug("%1$s/%2$s skipped (%2$s was already seen).", *directory, de->d_name);
+                                continue;
                         }
 
+                        r = set_put_strdup(seen, de->d_name);
+                        if (r < 0)
+                                return log_oom();
+
+                        path = strjoin(*directory, "/", de->d_name, NULL);
+                        if (!path)
+                                return log_oom();
+
+                        if (null_or_empty_path(path)) {
+                                log_debug("%s is empty (a mask).", path);
+                                continue;
+                        } else
+                                log_debug("%s will be executed.", path);
+
                         pid = fork();
                         if (pid < 0) {
                                 log_error_errno(errno, "Failed to fork: %m");
@@ -4109,45 +4126,68 @@ void execute_directory(const char *directory, DIR *d, usec_t timeout, char *argv
                                         argv[0] = path;
 
                                 execv(path, argv);
-                                log_error_errno(errno, "Failed to execute %s: %m", path);
-                                _exit(EXIT_FAILURE);
+                                return log_error_errno(errno, "Failed to execute %s: %m", path);
                         }
 
                         log_debug("Spawned %s as " PID_FMT ".", path, pid);
 
                         r = hashmap_put(pids, UINT_TO_PTR(pid), path);
-                        if (r < 0) {
-                                log_oom();
-                                _exit(EXIT_FAILURE);
-                        }
-
+                        if (r < 0)
+                                return log_oom();
                         path = NULL;
                 }
+        }
+
+        /* Abort execution of this process after the timout. We simply
+         * rely on SIGALRM as default action terminating the process,
+         * and turn on alarm(). */
 
-                /* Abort execution of this process after the
-                 * timout. We simply rely on SIGALRM as default action
-                 * terminating the process, and turn on alarm(). */
+        if (timeout != USEC_INFINITY)
+                alarm((timeout + USEC_PER_SEC - 1) / USEC_PER_SEC);
 
-                if (timeout != USEC_INFINITY)
-                        alarm((timeout + USEC_PER_SEC - 1) / USEC_PER_SEC);
+        while (!hashmap_isempty(pids)) {
+                _cleanup_free_ char *path = NULL;
+                pid_t pid;
 
-                while (!hashmap_isempty(pids)) {
-                        _cleanup_free_ char *path = NULL;
-                        pid_t pid;
+                pid = PTR_TO_UINT(hashmap_first_key(pids));
+                assert(pid > 0);
 
-                        pid = PTR_TO_UINT(hashmap_first_key(pids));
-                        assert(pid > 0);
+                path = hashmap_remove(pids, UINT_TO_PTR(pid));
+                assert(path);
 
-                        path = hashmap_remove(pids, UINT_TO_PTR(pid));
-                        assert(path);
+                wait_for_terminate_and_warn(path, pid, true);
+        }
 
-                        wait_for_terminate_and_warn(path, pid, true);
-                }
+        return 0;
+}
 
-                _exit(EXIT_SUCCESS);
+void execute_directories(const char* const* directories, usec_t timeout, char *argv[]) {
+        pid_t executor_pid;
+        int r;
+        char *name;
+        char **dirs = (char**) directories;
+
+        assert(!strv_isempty(dirs));
+
+        name = basename(dirs[0]);
+        assert(!isempty(name));
+
+        /* Executes all binaries in the directories in parallel and waits
+         * for them to finish. Optionally a timeout is applied. If a file
+         * with the same name exists in more than one directory, the
+         * earliest one wins. */
+
+        executor_pid = fork();
+        if (executor_pid < 0) {
+                log_error_errno(errno, "Failed to fork: %m");
+                return;
+
+        } else if (executor_pid == 0) {
+                r = do_execute(dirs, timeout, argv);
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
         }
 
-        wait_for_terminate_and_warn(directory, executor_pid, true);
+        wait_for_terminate_and_warn(name, executor_pid, true);
 }
 
 int kill_and_sigcont(pid_t pid, int sig) {
@@ -4270,23 +4310,6 @@ bool machine_name_is_valid(const char *s) {
         return true;
 }
 
-bool image_name_is_valid(const char *s) {
-        if (!filename_is_valid(s))
-                return false;
-
-        if (string_has_cc(s, NULL))
-                return false;
-
-        if (!utf8_is_valid(s))
-                return false;
-
-        /* Temporary files for atomically creating new files */
-        if (startswith(s, ".#"))
-                return false;
-
-        return true;
-}
-
 int pipe_eof(int fd) {
         struct pollfd pollfd = {
                 .fd = fd,
@@ -5183,6 +5206,9 @@ char *format_bytes(char *buf, size_t l, off_t t) {
                 { "K", 1024ULL },
         };
 
+        if (t == (off_t) -1)
+                return NULL;
+
         for (i = 0; i < ELEMENTSOF(table); i++) {
 
                 if (t >= table[i].factor) {
@@ -6713,23 +6739,6 @@ uint64_t physical_memory(void) {
         return (uint64_t) mem * (uint64_t) page_size();
 }
 
-char* mount_test_option(const char *haystack, const char *needle) {
-
-        struct mntent me = {
-                .mnt_opts = (char*) haystack
-        };
-
-        assert(needle);
-
-        /* Like glibc's hasmntopt(), but works on a string, not a
-         * struct mntent */
-
-        if (!haystack)
-                return NULL;
-
-        return hasmntopt(&me, needle);
-}
-
 void hexdump(FILE *f, const void *p, size_t s) {
         const uint8_t *b = p;
         unsigned n = 0;
@@ -6854,6 +6863,15 @@ int umount_recursive(const char *prefix, int flags) {
         return r ? r : n;
 }
 
+static int get_mount_flags(const char *path, unsigned long *flags) {
+        struct statvfs buf;
+
+        if (statvfs(path, &buf) < 0)
+                return -errno;
+        *flags = buf.f_flag;
+        return 0;
+}
+
 int bind_remount_recursive(const char *prefix, bool ro) {
         _cleanup_set_free_free_ Set *done = NULL;
         _cleanup_free_ char *cleaned = NULL;
@@ -6888,6 +6906,7 @@ int bind_remount_recursive(const char *prefix, bool ro) {
                 _cleanup_set_free_free_ Set *todo = NULL;
                 bool top_autofs = false;
                 char *x;
+                unsigned long orig_flags;
 
                 todo = set_new(&string_hash_ops);
                 if (!todo)
@@ -6965,7 +6984,11 @@ int bind_remount_recursive(const char *prefix, bool ro) {
                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
                                 return -errno;
 
-                        if (mount(NULL, prefix, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+                        orig_flags = 0;
+                        (void) get_mount_flags(cleaned, &orig_flags);
+                        orig_flags &= ~MS_RDONLY;
+
+                        if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
                                 return -errno;
 
                         x = strdup(cleaned);
@@ -6985,7 +7008,14 @@ int bind_remount_recursive(const char *prefix, bool ro) {
                         if (r < 0)
                                 return r;
 
-                        if (mount(NULL, x, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
+                        /* Try to reuse the original flag set, but
+                         * don't care for errors, in case of
+                         * obstructed mounts */
+                        orig_flags = 0;
+                        (void) get_mount_flags(x, &orig_flags);
+                        orig_flags &= ~MS_RDONLY;
+
+                        if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
 
                                 /* Deal with mount points that are
                                  * obstructed by a later mount */
@@ -7559,3 +7589,345 @@ int openpt_in_namespace(pid_t pid, int flags) {
 
         return -EIO;
 }
+
+ssize_t fgetxattrat_fake(int dirfd, const char *filename, const char *attribute, void *value, size_t size, int flags) {
+        _cleanup_close_ int fd = -1;
+        ssize_t l;
+
+        /* The kernel doesn't have a fgetxattrat() command, hence let's emulate one */
+
+        fd = openat(dirfd, filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOATIME|(flags & AT_SYMLINK_NOFOLLOW ? O_NOFOLLOW : 0));
+        if (fd < 0)
+                return -errno;
+
+        l = fgetxattr(fd, attribute, value, size);
+        if (l < 0)
+                return -errno;
+
+        return l;
+}
+
+static int parse_crtime(le64_t le, usec_t *usec) {
+        uint64_t u;
+
+        assert(usec);
+
+        u = le64toh(le);
+        if (u == 0 || u == (uint64_t) -1)
+                return -EIO;
+
+        *usec = (usec_t) u;
+        return 0;
+}
+
+int fd_getcrtime(int fd, usec_t *usec) {
+        le64_t le;
+        ssize_t n;
+
+        assert(fd >= 0);
+        assert(usec);
+
+        /* Until Linux gets a real concept of birthtime/creation time,
+         * let's fake one with xattrs */
+
+        n = fgetxattr(fd, "user.crtime_usec", &le, sizeof(le));
+        if (n < 0)
+                return -errno;
+        if (n != sizeof(le))
+                return -EIO;
+
+        return parse_crtime(le, usec);
+}
+
+int fd_getcrtime_at(int dirfd, const char *name, usec_t *usec, int flags) {
+        le64_t le;
+        ssize_t n;
+
+        n = fgetxattrat_fake(dirfd, name, "user.crtime_usec", &le, sizeof(le), flags);
+        if (n < 0)
+                return -errno;
+        if (n != sizeof(le))
+                return -EIO;
+
+        return parse_crtime(le, usec);
+}
+
+int path_getcrtime(const char *p, usec_t *usec) {
+        le64_t le;
+        ssize_t n;
+
+        assert(p);
+        assert(usec);
+
+        n = getxattr(p, "user.crtime_usec", &le, sizeof(le));
+        if (n < 0)
+                return -errno;
+        if (n != sizeof(le))
+                return -EIO;
+
+        return parse_crtime(le, usec);
+}
+
+int fd_setcrtime(int fd, usec_t usec) {
+        le64_t le;
+
+        assert(fd >= 0);
+
+        if (usec <= 0)
+                usec = now(CLOCK_REALTIME);
+
+        le = htole64((uint64_t) usec);
+        if (fsetxattr(fd, "user.crtime_usec", &le, sizeof(le), 0) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int same_fd(int a, int b) {
+        struct stat sta, stb;
+        pid_t pid;
+        int r, fa, fb;
+
+        assert(a >= 0);
+        assert(b >= 0);
+
+        /* Compares two file descriptors. Note that semantics are
+         * quite different depending on whether we have kcmp() or we
+         * don't. If we have kcmp() this will only return true for
+         * dup()ed file descriptors, but not otherwise. If we don't
+         * have kcmp() this will also return true for two fds of the same
+         * file, created by separate open() calls. Since we use this
+         * call mostly for filtering out duplicates in the fd store
+         * this difference hopefully doesn't matter too much. */
+
+        if (a == b)
+                return true;
+
+        /* Try to use kcmp() if we have it. */
+        pid = getpid();
+        r = kcmp(pid, pid, KCMP_FILE, a, b);
+        if (r == 0)
+                return true;
+        if (r > 0)
+                return false;
+        if (errno != ENOSYS)
+                return -errno;
+
+        /* We don't have kcmp(), use fstat() instead. */
+        if (fstat(a, &sta) < 0)
+                return -errno;
+
+        if (fstat(b, &stb) < 0)
+                return -errno;
+
+        if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
+                return false;
+
+        /* We consider all device fds different, since two device fds
+         * might refer to quite different device contexts even though
+         * they share the same inode and backing dev_t. */
+
+        if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
+                return false;
+
+        if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
+                return false;
+
+        /* The fds refer to the same inode on disk, let's also check
+         * if they have the same fd flags. This is useful to
+         * distuingish the read and write side of a pipe created with
+         * pipe(). */
+        fa = fcntl(a, F_GETFL);
+        if (fa < 0)
+                return -errno;
+
+        fb = fcntl(b, F_GETFL);
+        if (fb < 0)
+                return -errno;
+
+        return fa == fb;
+}
+
+int chattr_fd(int fd, bool b, unsigned mask) {
+        unsigned old_attr, new_attr;
+
+        assert(fd >= 0);
+
+        if (mask == 0)
+                return 0;
+
+        if (ioctl(fd, FS_IOC_GETFLAGS, &old_attr) < 0)
+                return -errno;
+
+        if (b)
+                new_attr = old_attr | mask;
+        else
+                new_attr = old_attr & ~mask;
+
+        if (new_attr == old_attr)
+                return 0;
+
+        if (ioctl(fd, FS_IOC_SETFLAGS, &new_attr) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int chattr_path(const char *p, bool b, unsigned mask) {
+        _cleanup_close_ int fd = -1;
+
+        assert(p);
+
+        if (mask == 0)
+                return 0;
+
+        fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+        if (fd < 0)
+                return -errno;
+
+        return chattr_fd(fd, b, mask);
+}
+
+int read_attr_fd(int fd, unsigned *ret) {
+        assert(fd >= 0);
+
+        if (ioctl(fd, FS_IOC_GETFLAGS, ret) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int read_attr_path(const char *p, unsigned *ret) {
+        _cleanup_close_ int fd = -1;
+
+        assert(p);
+        assert(ret);
+
+        fd = open(p, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
+        if (fd < 0)
+                return -errno;
+
+        return read_attr_fd(fd, ret);
+}
+
+int make_lock_file(const char *p, int operation, LockFile *ret) {
+        _cleanup_close_ int fd = -1;
+        _cleanup_free_ char *t = NULL;
+        int r;
+
+        /*
+         * We use UNPOSIX locks if they are available. They have nice
+         * semantics, and are mostly compatible with NFS. However,
+         * they are only available on new kernels. When we detect we
+         * are running on an older kernel, then we fall back to good
+         * old BSD locks. They also have nice semantics, but are
+         * slightly problematic on NFS, where they are upgraded to
+         * POSIX locks, even though locally they are orthogonal to
+         * POSIX locks.
+         */
+
+        t = strdup(p);
+        if (!t)
+                return -ENOMEM;
+
+        for (;;) {
+                struct flock fl = {
+                        .l_type = (operation & ~LOCK_NB) == LOCK_EX ? F_WRLCK : F_RDLCK,
+                        .l_whence = SEEK_SET,
+                };
+                struct stat st;
+
+                fd = open(p, O_CREAT|O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NOCTTY, 0600);
+                if (fd < 0)
+                        return -errno;
+
+                r = fcntl(fd, (operation & LOCK_NB) ? F_OFD_SETLK : F_OFD_SETLKW, &fl);
+                if (r < 0) {
+
+                        /* If the kernel is too old, use good old BSD locks */
+                        if (errno == EINVAL)
+                                r = flock(fd, operation);
+
+                        if (r < 0)
+                                return errno == EAGAIN ? -EBUSY : -errno;
+                }
+
+                /* If we acquired the lock, let's check if the file
+                 * still exists in the file system. If not, then the
+                 * previous exclusive owner removed it and then closed
+                 * it. In such a case our acquired lock is worthless,
+                 * hence try again. */
+
+                r = fstat(fd, &st);
+                if (r < 0)
+                        return -errno;
+                if (st.st_nlink > 0)
+                        break;
+
+                fd = safe_close(fd);
+        }
+
+        ret->path = t;
+        ret->fd = fd;
+        ret->operation = operation;
+
+        fd = -1;
+        t = NULL;
+
+        return r;
+}
+
+int make_lock_file_for(const char *p, int operation, LockFile *ret) {
+        const char *fn;
+        char *t;
+
+        assert(p);
+        assert(ret);
+
+        fn = basename(p);
+        if (!filename_is_valid(fn))
+                return -EINVAL;
+
+        t = newa(char, strlen(p) + 2 + 4 + 1);
+        stpcpy(stpcpy(stpcpy(mempcpy(t, p, fn - p), ".#"), fn), ".lck");
+
+        return make_lock_file(t, operation, ret);
+}
+
+void release_lock_file(LockFile *f) {
+        int r;
+
+        if (!f)
+                return;
+
+        if (f->path) {
+
+                /* If we are the exclusive owner we can safely delete
+                 * the lock file itself. If we are not the exclusive
+                 * owner, we can try becoming it. */
+
+                if (f->fd >= 0 &&
+                    (f->operation & ~LOCK_NB) == LOCK_SH) {
+                        static const struct flock fl = {
+                                .l_type = F_WRLCK,
+                                .l_whence = SEEK_SET,
+                        };
+
+                        r = fcntl(f->fd, F_OFD_SETLK, &fl);
+                        if (r < 0 && errno == EINVAL)
+                                r = flock(f->fd, LOCK_EX|LOCK_NB);
+
+                        if (r >= 0)
+                                f->operation = LOCK_EX|LOCK_NB;
+                }
+
+                if ((f->operation & ~LOCK_NB) == LOCK_EX)
+                        unlink_noerrno(f->path);
+
+                free(f->path);
+                f->path = NULL;
+        }
+
+        f->fd = safe_close(f->fd);
+        f->operation = 0;
+}