chiark / gitweb /
util: fix unicode decoding in unquote_first_word()
[elogind.git] / src / shared / util.c
index 392c42ba2be4280ba0284f0ccbe6449a75ad1c1c..3561573e16da8fe190be18da21defd53e53a01f6 100644 (file)
@@ -25,7 +25,6 @@
 #include <stdlib.h>
 #include <signal.h>
 #include <libintl.h>
-#include <locale.h>
 #include <stdio.h>
 #include <syslog.h>
 #include <sched.h>
@@ -1354,7 +1353,8 @@ char *cescape(const char *s) {
 
         assert(s);
 
-        /* Does C style string escaping. */
+        /* Does C style string escaping. May be reversed with
+         * cunescape(). */
 
         r = new(char, strlen(s)*4 + 1);
         if (!r)
@@ -1368,13 +1368,17 @@ char *cescape(const char *s) {
         return r;
 }
 
-static int cunescape_one(const char *p, size_t length, char *ret) {
+static int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
         int r = 1;
 
         assert(p);
         assert(*p);
         assert(ret);
 
+        /* Unescapes C style. Returns the unescaped character in ret,
+         * unless we encountered a \u sequence in which case the full
+         * unicode character is returned in ret_unicode, instead. */
+
         if (length != (size_t) -1 && length < 1)
                 return -EINVAL;
 
@@ -1431,15 +1435,92 @@ static int cunescape_one(const char *p, size_t length, char *ret) {
                 if (b < 0)
                         return -EINVAL;
 
-                /* don't allow NUL bytes */
+                /* Don't allow NUL bytes */
                 if (a == 0 && b == 0)
                         return -EINVAL;
 
-                *ret = (char) ((a << 4) | b);
+                *ret = (char) ((a << 4U) | b);
                 r = 3;
                 break;
         }
 
+        case 'u': {
+                /* C++11 style 16bit unicode */
+
+                int a[4];
+                unsigned i;
+                uint32_t c;
+
+                if (length != (size_t) -1 && length < 5)
+                        return -EINVAL;
+
+                for (i = 0; i < 4; i++) {
+                        a[i] = unhexchar(p[1 + i]);
+                        if (a[i] < 0)
+                                return a[i];
+                }
+
+                c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
+
+                /* Don't allow 0 chars */
+                if (c == 0)
+                        return -EINVAL;
+
+                if (c < 128)
+                        *ret = c;
+                else {
+                        if (!ret_unicode)
+                                return -EINVAL;
+
+                        *ret = 0;
+                        *ret_unicode = c;
+                }
+
+                r = 5;
+                break;
+        }
+
+        case 'U': {
+                /* C++11 style 32bit unicode */
+
+                int a[8];
+                unsigned i;
+                uint32_t c;
+
+                if (length != (size_t) -1 && length < 9)
+                        return -EINVAL;
+
+                for (i = 0; i < 8; i++) {
+                        a[i] = unhexchar(p[1 + i]);
+                        if (a[i] < 0)
+                                return a[i];
+                }
+
+                c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
+                    ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];
+
+                /* Don't allow 0 chars */
+                if (c == 0)
+                        return -EINVAL;
+
+                /* Don't allow invalid code points */
+                if (!unichar_is_valid(c))
+                        return -EINVAL;
+
+                if (c < 128)
+                        *ret = c;
+                else {
+                        if (!ret_unicode)
+                                return -EINVAL;
+
+                        *ret = 0;
+                        *ret_unicode = c;
+                }
+
+                r = 9;
+                break;
+        }
+
         case '0':
         case '1':
         case '2':
@@ -1449,7 +1530,8 @@ static int cunescape_one(const char *p, size_t length, char *ret) {
         case '6':
         case '7': {
                 /* octal encoding */
-                int a, b, c, m;
+                int a, b, c;
+                uint32_t m;
 
                 if (length != (size_t) -1 && length < 4)
                         return -EINVAL;
@@ -1471,11 +1553,11 @@ static int cunescape_one(const char *p, size_t length, char *ret) {
                         return -EINVAL;
 
                 /* Don't allow bytes above 255 */
-                m = (a << 6) | (b << 3) | c;
+                m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
                 if (m > 255)
                         return -EINVAL;
 
-                *ret = (char) m;
+                *ret = m;
                 r = 3;
                 break;
         }
@@ -1487,12 +1569,13 @@ static int cunescape_one(const char *p, size_t length, char *ret) {
         return r;
 }
 
-char *cunescape_length_with_prefix(const char *s, size_t length, const char *prefix) {
+int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
         char *r, *t;
         const char *f;
         size_t pl;
 
         assert(s);
+        assert(ret);
 
         /* Undoes C style string escaping, and optionally prefixes it. */
 
@@ -1500,47 +1583,71 @@ char *cunescape_length_with_prefix(const char *s, size_t length, const char *pre
 
         r = new(char, pl+length+1);
         if (!r)
-                return NULL;
+                return -ENOMEM;
 
         if (prefix)
                 memcpy(r, prefix, pl);
 
         for (f = s, t = r + pl; f < s + length; f++) {
                 size_t remaining;
+                uint32_t u;
+                char c;
                 int k;
 
                 remaining = s + length - f;
                 assert(remaining > 0);
 
-                if (*f != '\\' || remaining == 1) {
-                        /* a literal literal, or a trailing backslash, copy verbatim */
+                if (*f != '\\') {
+                        /* A literal literal, copy verbatim */
                         *(t++) = *f;
                         continue;
                 }
 
-                k = cunescape_one(f + 1, remaining - 1, t);
+                if (remaining == 1) {
+                        if (flags & UNESCAPE_RELAX) {
+                                /* A trailing backslash, copy verbatim */
+                                *(t++) = *f;
+                                continue;
+                        }
+
+                        free(r);
+                        return -EINVAL;
+                }
+
+                k = cunescape_one(f + 1, remaining - 1, &c, &u);
                 if (k < 0) {
-                        /* Invalid escape code, let's take it literal then */
-                        *(t++) = '\\';
-                        continue;
+                        if (flags & UNESCAPE_RELAX) {
+                                /* Invalid escape code, let's take it literal then */
+                                *(t++) = '\\';
+                                continue;
+                        }
+
+                        free(r);
+                        return k;
                 }
 
+                if (c != 0)
+                        /* Non-Unicode? Let's encode this directly */
+                        *(t++) = c;
+                else
+                        /* Unicode? Then let's encode this in UTF-8 */
+                        t += utf8_encode_unichar(t, u);
+
                 f += k;
-                t++;
         }
 
         *t = 0;
-        return r;
-}
 
-char *cunescape_length(const char *s, size_t length) {
-        return cunescape_length_with_prefix(s, length, NULL);
+        *ret = r;
+        return t - r;
 }
 
-char *cunescape(const char *s) {
-        assert(s);
+int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
+        return cunescape_length_with_prefix(s, length, NULL, flags, ret);
+}
 
-        return cunescape_length(s, strlen(s));
+int cunescape(const char *s, UnescapeFlags flags, char **ret) {
+        return cunescape_length(s, strlen(s), flags, ret);
 }
 
 char *xescape(const char *s, const char *bad) {
@@ -1549,7 +1656,7 @@ char *xescape(const char *s, const char *bad) {
 
         /* Escapes all chars in bad, in addition to \ and all special
          * chars, in \xFF style escaping. May be reversed with
-         * cunescape. */
+         * cunescape(). */
 
         r = new(char, strlen(s) * 4 + 1);
         if (!r)
@@ -3009,101 +3116,14 @@ int get_ctty(pid_t pid, dev_t *_devnr, char **r) {
         return 0;
 }
 
-int rm_rf_children_dangerous(int fd, bool only_dirs, bool honour_sticky, struct stat *root_dev) {
-        _cleanup_closedir_ DIR *d = NULL;
-        int ret = 0;
-
-        assert(fd >= 0);
-
-        /* This returns the first error we run into, but nevertheless
-         * tries to go on. This closes the passed fd. */
-
-        d = fdopendir(fd);
-        if (!d) {
-                safe_close(fd);
-
-                return errno == ENOENT ? 0 : -errno;
-        }
-
-        for (;;) {
-                struct dirent *de;
-                bool is_dir, keep_around;
-                struct stat st;
-                int r;
-
-                errno = 0;
-                de = readdir(d);
-                if (!de) {
-                        if (errno != 0 && ret == 0)
-                                ret = -errno;
-                        return ret;
-                }
-
-                if (streq(de->d_name, ".") || streq(de->d_name, ".."))
-                        continue;
-
-                if (de->d_type == DT_UNKNOWN ||
-                    honour_sticky ||
-                    (de->d_type == DT_DIR && root_dev)) {
-                        if (fstatat(fd, de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
-                                if (ret == 0 && errno != ENOENT)
-                                        ret = -errno;
-                                continue;
-                        }
-
-                        is_dir = S_ISDIR(st.st_mode);
-                        keep_around =
-                                honour_sticky &&
-                                (st.st_uid == 0 || st.st_uid == getuid()) &&
-                                (st.st_mode & S_ISVTX);
-                } else {
-                        is_dir = de->d_type == DT_DIR;
-                        keep_around = false;
-                }
-
-                if (is_dir) {
-                        int subdir_fd;
-
-                        /* if root_dev is set, remove subdirectories only, if device is same as dir */
-                        if (root_dev && st.st_dev != root_dev->st_dev)
-                                continue;
-
-                        subdir_fd = openat(fd, de->d_name,
-                                           O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
-                        if (subdir_fd < 0) {
-                                if (ret == 0 && errno != ENOENT)
-                                        ret = -errno;
-                                continue;
-                        }
-
-                        r = rm_rf_children_dangerous(subdir_fd, only_dirs, honour_sticky, root_dev);
-                        if (r < 0 && ret == 0)
-                                ret = r;
-
-                        if (!keep_around)
-                                if (unlinkat(fd, de->d_name, AT_REMOVEDIR) < 0) {
-                                        if (ret == 0 && errno != ENOENT)
-                                                ret = -errno;
-                                }
-
-                } else if (!only_dirs && !keep_around) {
-
-                        if (unlinkat(fd, de->d_name, 0) < 0) {
-                                if (ret == 0 && errno != ENOENT)
-                                        ret = -errno;
-                        }
-                }
-        }
-}
-
-_pure_ static int is_temporary_fs(struct statfs *s) {
+bool is_temporary_fs(const struct statfs *s) {
         assert(s);
 
         return F_TYPE_EQUAL(s->f_type, TMPFS_MAGIC) ||
                F_TYPE_EQUAL(s->f_type, RAMFS_MAGIC);
 }
 
-int is_fd_on_temporary_fs(int fd) {
+int fd_is_temporary_fs(int fd) {
         struct statfs s;
 
         if (fstatfs(fd, &s) < 0)
@@ -3112,114 +3132,6 @@ int is_fd_on_temporary_fs(int fd) {
         return is_temporary_fs(&s);
 }
 
-int rm_rf_children(int fd, bool only_dirs, bool honour_sticky, struct stat *root_dev) {
-        struct statfs s;
-
-        assert(fd >= 0);
-
-        if (fstatfs(fd, &s) < 0) {
-                safe_close(fd);
-                return -errno;
-        }
-
-        /* We refuse to clean disk file systems with this call. This
-         * is extra paranoia just to be sure we never ever remove
-         * non-state data */
-        if (!is_temporary_fs(&s)) {
-                log_error("Attempted to remove disk file system, and we can't allow that.");
-                safe_close(fd);
-                return -EPERM;
-        }
-
-        return rm_rf_children_dangerous(fd, only_dirs, honour_sticky, root_dev);
-}
-
-static int file_is_priv_sticky(const char *p) {
-        struct stat st;
-
-        assert(p);
-
-        if (lstat(p, &st) < 0)
-                return -errno;
-
-        return
-                (st.st_uid == 0 || st.st_uid == getuid()) &&
-                (st.st_mode & S_ISVTX);
-}
-
-static int rm_rf_internal(const char *path, bool only_dirs, bool delete_root, bool honour_sticky, bool dangerous) {
-        int fd, r;
-        struct statfs s;
-
-        assert(path);
-
-        /* We refuse to clean the root file system with this
-         * call. This is extra paranoia to never cause a really
-         * seriously broken system. */
-        if (path_equal(path, "/")) {
-                log_error("Attempted to remove entire root file system, and we can't allow that.");
-                return -EPERM;
-        }
-
-        fd = open(path, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW|O_NOATIME);
-        if (fd < 0) {
-
-                if (errno != ENOTDIR && errno != ELOOP)
-                        return -errno;
-
-                if (!dangerous) {
-                        if (statfs(path, &s) < 0)
-                                return -errno;
-
-                        if (!is_temporary_fs(&s)) {
-                                log_error("Attempted to remove disk file system, and we can't allow that.");
-                                return -EPERM;
-                        }
-                }
-
-                if (delete_root && !only_dirs)
-                        if (unlink(path) < 0 && errno != ENOENT)
-                                return -errno;
-
-                return 0;
-        }
-
-        if (!dangerous) {
-                if (fstatfs(fd, &s) < 0) {
-                        safe_close(fd);
-                        return -errno;
-                }
-
-                if (!is_temporary_fs(&s)) {
-                        log_error("Attempted to remove disk file system, and we can't allow that.");
-                        safe_close(fd);
-                        return -EPERM;
-                }
-        }
-
-        r = rm_rf_children_dangerous(fd, only_dirs, honour_sticky, NULL);
-        if (delete_root) {
-
-                if (honour_sticky && file_is_priv_sticky(path) > 0)
-                        return r;
-
-                if (rmdir(path) < 0 && errno != ENOENT) {
-                        if (r == 0)
-                                r = -errno;
-                }
-        }
-
-        return r;
-}
-
-int rm_rf(const char *path, bool only_dirs, bool delete_root, bool honour_sticky) {
-        return rm_rf_internal(path, only_dirs, delete_root, honour_sticky, false);
-}
-
-int rm_rf_dangerous(const char *path, bool only_dirs, bool delete_root, bool honour_sticky) {
-        return rm_rf_internal(path, only_dirs, delete_root, honour_sticky, true);
-}
-
 int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) {
         assert(path);
 
@@ -6947,9 +6859,9 @@ int umount_recursive(const char *prefix, int flags) {
                                 continue;
                         }
 
-                        p = cunescape(path);
-                        if (!p)
-                                return -ENOMEM;
+                        r = cunescape(path, UNESCAPE_RELAX, &p);
+                        if (r < 0)
+                                return r;
 
                         if (!path_startswith(p, prefix))
                                 continue;
@@ -7049,9 +6961,9 @@ int bind_remount_recursive(const char *prefix, bool ro) {
                                 continue;
                         }
 
-                        p = cunescape(path);
-                        if (!p)
-                                return -ENOMEM;
+                        r = cunescape(path, UNESCAPE_RELAX, &p);
+                        if (r < 0)
+                                return r;
 
                         /* Let's ignore autofs mounts.  If they aren't
                          * triggered yet, we want to avoid triggering
@@ -7393,20 +7305,26 @@ int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) {
                                 return -EINVAL;
                         }
 
-                        if (!GREEDY_REALLOC(s, allocated, sz+2))
+                        if (!GREEDY_REALLOC(s, allocated, sz+7))
                                 return -ENOMEM;
 
                         if (flags & UNQUOTE_CUNESCAPE) {
-                                r = cunescape_one(*p, (size_t) -1, &c);
+                                uint32_t u;
+
+                                r = cunescape_one(*p, (size_t) -1, &c, &u);
                                 if (r < 0)
                                         return -EINVAL;
 
                                 (*p) += r - 1;
-                        }
 
-                        s[sz++] = c;
-                        state = VALUE;
+                                if (c != 0)
+                                        s[sz++] = c; /* normal explicit char */
+                                else
+                                        sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
+                        } else
+                                s[sz++] = c;
 
+                        state = VALUE;
                         break;
 
                 case SINGLE_QUOTE:
@@ -7434,18 +7352,25 @@ int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) {
                                 return -EINVAL;
                         }
 
-                        if (!GREEDY_REALLOC(s, allocated, sz+2))
+                        if (!GREEDY_REALLOC(s, allocated, sz+7))
                                 return -ENOMEM;
 
                         if (flags & UNQUOTE_CUNESCAPE) {
-                                r = cunescape_one(*p, (size_t) -1, &c);
+                                uint32_t u;
+
+                                r = cunescape_one(*p, (size_t) -1, &c, &u);
                                 if (r < 0)
                                         return -EINVAL;
 
                                 (*p) += r - 1;
-                        }
 
-                        s[sz++] = c;
+                                if (c != 0)
+                                        s[sz++] = c;
+                                else
+                                        sz += utf8_encode_unichar(s + sz, u);
+                        } else
+                                s[sz++] = c;
+
                         state = SINGLE_QUOTE;
                         break;
 
@@ -7472,18 +7397,25 @@ int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) {
                                 return -EINVAL;
                         }
 
-                        if (!GREEDY_REALLOC(s, allocated, sz+2))
+                        if (!GREEDY_REALLOC(s, allocated, sz+7))
                                 return -ENOMEM;
 
                         if (flags & UNQUOTE_CUNESCAPE) {
-                                r = cunescape_one(*p, (size_t) -1, &c);
+                                uint32_t u;
+
+                                r = cunescape_one(*p, (size_t) -1, &c, &u);
                                 if (r < 0)
                                         return -EINVAL;
 
                                 (*p) += r - 1;
-                        }
 
-                        s[sz++] = c;
+                                if (c != 0)
+                                        s[sz++] = c;
+                                else
+                                        sz += utf8_encode_unichar(s + sz, u);
+                        } else
+                                s[sz++] = c;
+
                         state = DOUBLE_QUOTE;
                         break;
 
@@ -7815,7 +7747,7 @@ int fd_setcrtime(int fd, usec_t usec) {
         return 0;
 }
 
-int chattr_fd(int fd, bool b, unsigned mask) {
+int chattr_fd(int fd, unsigned value, unsigned mask) {
         unsigned old_attr, new_attr;
 
         assert(fd >= 0);
@@ -7826,21 +7758,17 @@ int chattr_fd(int fd, bool b, unsigned mask) {
         if (ioctl(fd, FS_IOC_GETFLAGS, &old_attr) < 0)
                 return -errno;
 
-        if (b)
-                new_attr = old_attr | mask;
-        else
-                new_attr = old_attr & ~mask;
-
+        new_attr = (old_attr & ~mask) | (value & mask);
         if (new_attr == old_attr)
                 return 0;
 
         if (ioctl(fd, FS_IOC_SETFLAGS, &new_attr) < 0)
                 return -errno;
 
-        return 0;
+        return 1;
 }
 
-int chattr_path(const char *p, bool b, unsigned mask) {
+int chattr_path(const char *p, unsigned value, unsigned mask) {
         _cleanup_close_ int fd = -1;
 
         assert(p);
@@ -7852,29 +7780,7 @@ int chattr_path(const char *p, bool b, unsigned mask) {
         if (fd < 0)
                 return -errno;
 
-        return chattr_fd(fd, b, mask);
-}
-
-int change_attr_fd(int fd, unsigned value, unsigned mask) {
-        unsigned old_attr, new_attr;
-
-        assert(fd >= 0);
-
-        if (mask == 0)
-                return 0;
-
-        if (ioctl(fd, FS_IOC_GETFLAGS, &old_attr) < 0)
-                return -errno;
-
-        new_attr = (old_attr & ~mask) |(value & mask);
-
-        if (new_attr == old_attr)
-                return 0;
-
-        if (ioctl(fd, FS_IOC_SETFLAGS, &new_attr) < 0)
-                return -errno;
-
-        return 0;
+        return chattr_fd(fd, value, mask);
 }
 
 int read_attr_fd(int fd, unsigned *ret) {
@@ -8198,3 +8104,43 @@ int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char
 
         return 0;
 }
+
+char *shell_maybe_quote(const char *s) {
+        const char *p;
+        char *r, *t;
+
+        assert(s);
+
+        /* Encloses a string in double quotes if necessary to make it
+         * OK as shell string. */
+
+        for (p = s; *p; p++)
+                if (*p <= ' ' ||
+                    *p >= 127 ||
+                    strchr(SHELL_NEED_QUOTES, *p))
+                        break;
+
+        if (!*p)
+                return strdup(s);
+
+        r = new(char, 1+strlen(s)*2+1+1);
+        if (!r)
+                return NULL;
+
+        t = r;
+        *(t++) = '"';
+        t = mempcpy(t, s, p - s);
+
+        for (; *p; p++) {
+
+                if (strchr(SHELL_NEED_ESCAPE, *p))
+                        *(t++) = '\\';
+
+                *(t++) = *p;
+        }
+
+        *(t++)= '"';
+        *t = 0;
+
+        return r;
+}