conf: enforce UTF8 validty everywhere

author Lennart Poettering <lennart@poettering.net>

Mon, 12 Mar 2012 21:22:16 +0000 (22:22 +0100)

committer Lennart Poettering <lennart@poettering.net>

Mon, 12 Mar 2012 21:22:21 +0000 (22:22 +0100)
author Lennart Poettering <lennart@poettering.net>
Mon, 12 Mar 2012 21:22:16 +0000 (22:22 +0100)
committer Lennart Poettering <lennart@poettering.net>
Mon, 12 Mar 2012 21:22:21 +0000 (22:22 +0100)
diff --git a/Makefile.am b/Makefile.am

index fc8b20c2b46b9f913ff076282d6873f67505c0c0..d2bd3404ce4c29f7e448264674945f3ac90e7bf1 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -468,7 +468,8 @@ libsystemd_basic_la_SOURCES = \
         src/socket-util.c \
         src/log.c \
         src/ratelimit.c \
-       src/exit-status.c
+       src/exit-status.c \
+        src/utf8.c
  
  libsystemd_basic_la_CFLAGS = \
         $(AM_CFLAGS) \
@@ -649,7 +650,8 @@ EXTRA_DIST += \
         src/dbus-loop.h \
         src/spawn-agent.h \
         src/acl-util.h \
-       src/logs-show.h
+       src/logs-show.h \
+        src/utf8.h
  
  MANPAGES = \
         man/systemd.1 \
diff --git a/TODO b/TODO

index 315afe6615c0c9dc2ee13684c6e406eddcc52b42..81aaabcc238b84a1b02cc7d1e8ecb098f771cd7e 100644 (file)
--- a/TODO
+++ b/TODO
@@ -18,6 +18,10 @@ Bugfixes:
  
  Features:
  
+* document crypttab(5)
+
+* There's currently no way to cancel fsck (used to be possible via C-c or c on the console)
+
  * hook up /dev/watchdog with main event loop for embedded, server uses
  
  * man: for some reason the HTML versions of the man pages are currently not being packaged
@@ -115,8 +119,6 @@ Features:
  
  * document that %% can be used to write % in a string that is specifier extended
  
-* check utf8 everywhere
-
  * when an instanced service exits, remove its parent cgroup too if possible.
  
  * Make libselinux, libattr, libcap, libdl dependencies only of the tools which actually need them.
diff --git a/src/conf-parser.c b/src/conf-parser.c

index c7dd01aa1d5347c3e0355cc1c8548e5ae94208cd..135b175c09a5dd4a5d939a34c41bf01b1121f39a 100644 (file)
--- a/src/conf-parser.c
+++ b/src/conf-parser.c
@@ -30,6 +30,7 @@
  #include "macro.h"
  #include "strv.h"
  #include "log.h"
+#include "utf8.h"
  
  int config_item_table_lookup(
                  void *table,
@@ -584,14 +585,23 @@ int config_parse_string(
          assert(rvalue);
          assert(data);
  
-        if (*rvalue) {
-                if (!(n = strdup(rvalue)))
-                        return -ENOMEM;
-        } else
-                n = NULL;
+        n = cunescape(rvalue);
+        if (!n)
+                return -ENOMEM;
+
+        if (!utf8_is_valid(n)) {
+                log_error("[%s:%u] String is not UTF-8 clean, ignoring assignment: %s", filename, line, rvalue);
+                free(n);
+                return 0;
+        }
  
          free(*s);
-        *s = n;
+        if (*n)
+                *s = n;
+        else {
+                free(n);
+                *s = NULL;
+        }
  
          return 0;
  }
@@ -614,12 +624,18 @@ int config_parse_path(
          assert(rvalue);
          assert(data);
  
+        if (!utf8_is_valid(rvalue)) {
+                log_error("[%s:%u] Path is not UTF-8 clean, ignoring assignment: %s", filename, line, rvalue);
+                return 0;
+        }
+
          if (!path_is_absolute(rvalue)) {
                  log_error("[%s:%u] Not an absolute path, ignoring: %s", filename, line, rvalue);
                  return 0;
          }
  
-        if (!(n = strdup(rvalue)))
+        n = strdup(rvalue);
+        if (!n)
                  return -ENOMEM;
  
          path_kill_slashes(n);
@@ -646,6 +662,7 @@ int config_parse_strv(
          unsigned k;
          size_t l;
          char *state;
+        int r;
  
          assert(filename);
          assert(lvalue);
@@ -656,7 +673,8 @@ int config_parse_strv(
          FOREACH_WORD_QUOTED(w, l, rvalue, state)
                  k++;
  
-        if (!(n = new(char*, k+1)))
+        n = new(char*, k+1);
+        if (!n)
                  return -ENOMEM;
  
          if (*sv)
@@ -665,9 +683,21 @@ int config_parse_strv(
          else
                  k = 0;
  
-        FOREACH_WORD_QUOTED(w, l, rvalue, state)
-                if (!(n[k++] = cunescape_length(w, l)))
+        FOREACH_WORD_QUOTED(w, l, rvalue, state) {
+                n[k] = cunescape_length(w, l);
+                if (!n[k]) {
+                        r = -ENOMEM;
                          goto fail;
+                }
+
+                if (!utf8_is_valid(n[k])) {
+                        log_error("[%s:%u] String is not UTF-8 clean, ignoring assignment: %s", filename, line, rvalue);
+                        free(n[k]);
+                        continue;
+                }
+
+                k++;
+        }
  
          n[k] = NULL;
          free(*sv);
@@ -680,7 +710,7 @@ fail:
                  free(n[k-1]);
          free(n);
  
-        return -ENOMEM;
+        return r;
  }
  
  int config_parse_path_strv(
@@ -710,7 +740,8 @@ int config_parse_path_strv(
          FOREACH_WORD_QUOTED(w, l, rvalue, state)
                  k++;
  
-        if (!(n = new(char*, k+1)))
+        n = new(char*, k+1);
+        if (!n)
                  return -ENOMEM;
  
          k = 0;
@@ -719,11 +750,18 @@ int config_parse_path_strv(
                          n[k] = (*sv)[k];
  
          FOREACH_WORD_QUOTED(w, l, rvalue, state) {
-                if (!(n[k] = cunescape_length(w, l))) {
+                n[k] = strndup(w, l);
+                if (!n[k]) {
                          r = -ENOMEM;
                          goto fail;
                  }
  
+                if (!utf8_is_valid(n[k])) {
+                        log_error("[%s:%u] Path is not UTF-8 clean, ignoring assignment: %s", filename, line, rvalue);
+                        free(n[k]);
+                        continue;
+                }
+
                  if (!path_is_absolute(n[k])) {
                          log_error("[%s:%u] Not an absolute path, ignoring: %s", filename, line, rvalue);
                          free(n[k]);
@@ -731,7 +769,6 @@ int config_parse_path_strv(
                  }
  
                  path_kill_slashes(n[k]);
-
                  k++;
          }
  
@@ -742,7 +779,6 @@ int config_parse_path_strv(
          return 0;
  
  fail:
-        free(n[k]);
          for (; k > 0; k--)
                  free(n[k-1]);
          free(n);
diff --git a/src/load-fragment.c b/src/load-fragment.c

index b963d7f9958a5ac26103f355a6594b5cd198be05..637c82b427d461fd0417831edb7aa3e4b16f0b37 100644 (file)
--- a/src/load-fragment.c
+++ b/src/load-fragment.c
@@ -43,6 +43,7 @@
  #include "missing.h"
  #include "unit-name.h"
  #include "bus-errors.h"
+#include "utf8.h"
  
  #ifndef HAVE_SYSV_COMPAT
  int config_parse_warn_compat(
@@ -90,7 +91,6 @@ int config_parse_unit_deps(
  
                  k = unit_name_printf(u, t);
                  free(t);
-
                  if (!k)
                          return -ENOMEM;
  
@@ -128,22 +128,18 @@ int config_parse_unit_names(
                  char *t, *k;
                  int r;
  
-                if (!(t = strndup(w, l)))
+                t = strndup(w, l);
+                if (!t)
                          return -ENOMEM;
  
                  k = unit_name_printf(u, t);
                  free(t);
-
                  if (!k)
                          return -ENOMEM;
  
                  r = unit_merge_by_name(u, k);
-
-                if (r < 0) {
+                if (r < 0)
                          log_error("Failed to add name %s, ignoring: %s", k, strerror(-r));
-                        free(k);
-                        return 0;
-                }
  
                  free(k);
          }
@@ -162,27 +158,22 @@ int config_parse_unit_string_printf(
                  void *userdata) {
  
          Unit *u = userdata;
-        char **s = data;
          char *k;
+        int r;
  
          assert(filename);
          assert(lvalue);
          assert(rvalue);
-        assert(s);
          assert(u);
  
-        if (!(k = unit_full_printf(u, rvalue)))
+        k = unit_full_printf(u, rvalue);
+        if (!k)
                  return -ENOMEM;
  
-        free(*s);
-        if (*k)
-                *s = k;
-        else {
-                free(k);
-                *s = NULL;
-        }
+        r = config_parse_string(filename, line, section, lvalue, ltype, k, data, userdata);
+        free (k);
  
-        return 0;
+        return r;
  }
  
  int config_parse_unit_strv_printf(
@@ -225,30 +216,22 @@ int config_parse_unit_path_printf(
                  void *userdata) {
  
          Unit *u = userdata;
-        char **s = data;
          char *k;
+        int r;
  
          assert(filename);
          assert(lvalue);
          assert(rvalue);
-        assert(s);
          assert(u);
  
-        if (!(k = unit_full_printf(u, rvalue)))
+        k = unit_full_printf(u, rvalue);
+        if (!k)
                  return -ENOMEM;
  
-        if (!path_is_absolute(k)) {
-                log_error("[%s:%u] Not an absolute path: %s", filename, line, k);
-                free(k);
-                return -EINVAL;
-        }
-
-        path_kill_slashes(k);
-
-        free(*s);
-        *s = k;
+        r = config_parse_path(filename, line, section, lvalue, ltype, k, data, userdata);
+        free(k);
  
-        return 0;
+        return r;
  }
  
  int config_parse_socket_listen(
@@ -271,7 +254,8 @@ int config_parse_socket_listen(
  
          s = SOCKET(data);
  
-        if (!(p = new0(SocketPort, 1)))
+        p = new0(SocketPort, 1);
+        if (!p)
                  return -ENOMEM;
  
          if (streq(lvalue, "ListenFIFO")) {
@@ -478,6 +462,7 @@ int config_parse_exec(
          ExecCommand **e = data, *nce;
          char *path, **n;
          unsigned k;
+        int r;
  
          assert(filename);
          assert(lvalue);
@@ -528,7 +513,8 @@ int config_parse_exec(
                          k++;
                  }
  
-                if (!(n = new(char*, k + !honour_argv0)))
+                n = new(char*, k + !honour_argv0);
+                if (!n)
                          return -ENOMEM;
  
                  k = 0;
@@ -538,11 +524,33 @@ int config_parse_exec(
  
                          if (honour_argv0 && w == rvalue) {
                                  assert(!path);
-                                if (!(path = cunescape_length(w, l)))
+
+                                path = strndup(w, l);
+                                if (!path) {
+                                        r = -ENOMEM;
                                          goto fail;
+                                }
+
+                                if (!utf8_is_valid(path)) {
+                                        log_error("[%s:%u] Path is not UTF-8 clean, ignoring assignment: %s", filename, line, rvalue);
+                                        r = 0;
+                                        goto fail;
+                                }
+
                          } else {
-                                if (!(n[k++] = cunescape_length(w, l)))
+                                char *c;
+
+                                c = n[k++] = cunescape_length(w, l);
+                                if (!c) {
+                                        r = -ENOMEM;
                                          goto fail;
+                                }
+
+                                if (!utf8_is_valid(c)) {
+                                        log_error("[%s:%u] Path is not UTF-8 clean, ignoring assignment: %s", filename, line, rvalue);
+                                        r = 0;
+                                        goto fail;
+                                }
                          }
                  }
  
@@ -550,19 +558,25 @@ int config_parse_exec(
  
                  if (!n[0]) {
                          log_error("[%s:%u] Invalid command line, ignoring: %s", filename, line, rvalue);
-                        strv_free(n);
-                        free(path);
-                        return 0;
+                        r = 0;
+                        goto fail;
                  }
  
-                if (!path)
-                        if (!(path = strdup(n[0])))
+                if (!path) {
+                        path = strdup(n[0]);
+                        if (!path) {
+                                r = -ENOMEM;
                                  goto fail;
+                        }
+                }
  
                  assert(path_is_absolute(path));
  
-                if (!(nce = new0(ExecCommand, 1)))
+                nce = new0(ExecCommand, 1);
+                if (!nce) {
+                        r = -ENOMEM;
                          goto fail;
+                }
  
                  nce->argv = n;
                  nce->path = path;
@@ -583,7 +597,7 @@ fail:
          free(path);
          free(nce);
  
-        return -ENOMEM;
+        return r;
  }
  
  DEFINE_CONFIG_PARSE_ENUM(config_parse_service_type, service_type, ServiceType, "Failed to parse service type");
diff --git a/src/service.c b/src/service.c

index babd9cf9a550b4892413855ef22d75d20876218c..8b5c0b07c4d29efe70b639936389d7b77a2dbdbb 100644 (file)
--- a/src/service.c
+++ b/src/service.c
@@ -39,6 +39,7 @@
  #include "exit-status.h"
  #include "def.h"
  #include "util.h"
+#include "utf8.h"
  
  #ifdef HAVE_SYSV_COMPAT
  
@@ -3200,11 +3201,19 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) {
          }
  
          /* Interpret STATUS= */
-        if ((e = strv_find_prefix(tags, "STATUS="))) {
+        e = strv_find_prefix(tags, "STATUS=");
+        if (e) {
                  char *t;
  
                  if (e[7]) {
-                        if (!(t = strdup(e+7))) {
+
+                        if (!utf8_is_valid(e+7)) {
+                                log_warning("Status message in notification is not UTF-8 clean.");
+                                return;
+                        }
+
+                        t = strdup(e+7);
+                        if (!t) {
                                  log_error("Failed to allocate string.");
                                  return;
                          }
diff --git a/src/utf8.c b/src/utf8.c

new file mode 100644 (file)

index 0000000..11619dc
--- /dev/null
+++ b/src/utf8.c
@@ -0,0 +1,214 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+/* This file is based on the GLIB utf8 validation functions. The
+ * original license text follows. */
+
+/* gutf8.c - Operations on UTF-8 strings.
+ *
+ * Copyright (C) 1999 Tom Tromey
+ * Copyright (C) 2000 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.         See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include "utf8.h"
+
+#define FILTER_CHAR '_'
+
+static inline bool is_unicode_valid(uint32_t ch) {
+
+        if (ch >= 0x110000) /* End of unicode space */
+                return false;
+        if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
+                return false;
+        if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
+                return false;
+        if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
+                return false;
+
+        return true;
+}
+
+static inline bool is_continuation_char(uint8_t ch) {
+        if ((ch & 0xc0) != 0x80) /* 10xxxxxx */
+                return false;
+        return true;
+}
+
+static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
+        *u_ch <<= 6;
+        *u_ch |= ch & 0x3f;
+}
+
+static char* utf8_validate(const char *str, char *output) {
+        uint32_t val = 0;
+        uint32_t min = 0;
+        const uint8_t *p, *last;
+        int size;
+        uint8_t *o;
+
+        assert(str);
+
+        o = (uint8_t*) output;
+        for (p = (const uint8_t*) str; *p; p++) {
+                if (*p < 128) {
+                        if (o)
+                                *o = *p;
+                } else {
+                        last = p;
+
+                        if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
+                                size = 2;
+                                min = 128;
+                                val = (uint32_t) (*p & 0x1e);
+                                goto ONE_REMAINING;
+                        } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
+                                size = 3;
+                                min = (1 << 11);
+                                val = (uint32_t) (*p & 0x0f);
+                                goto TWO_REMAINING;
+                        } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
+                                size = 4;
+                                min = (1 << 16);
+                                val = (uint32_t) (*p & 0x07);
+                        } else
+                                goto error;
+
+                        p++;
+                        if (!is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                TWO_REMAINING:
+                        p++;
+                        if (!is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                ONE_REMAINING:
+                        p++;
+                        if (!is_continuation_char(*p))
+                                goto error;
+                        merge_continuation_char(&val, *p);
+
+                        if (val < min)
+                                goto error;
+
+                        if (!is_unicode_valid(val))
+                                goto error;
+
+                        if (o) {
+                                memcpy(o, last, (size_t) size);
+                                o += size;
+                        }
+
+                        continue;
+
+                error:
+                        if (o) {
+                                *o = FILTER_CHAR;
+                                p = last; /* We retry at the next character */
+                        } else
+                                goto failure;
+                }
+
+                if (o)
+                        o++;
+        }
+
+        if (o) {
+                *o = '\0';
+                return output;
+        }
+
+        return (char*) str;
+
+failure:
+        return NULL;
+}
+
+char* utf8_is_valid (const char *str) {
+        return utf8_validate(str, NULL);
+}
+
+char* utf8_filter (const char *str) {
+        char *new_str;
+
+        assert(str);
+
+        new_str = malloc(strlen(str) + 1);
+        if (!new_str)
+                return NULL;
+
+        return utf8_validate(str, new_str);
+}
+
+char *ascii_is_valid(const char *str) {
+        const char *p;
+
+        assert(str);
+
+        for (p = str; *p; p++)
+                if ((unsigned char) *p >= 128)
+                        return NULL;
+
+        return (char*) str;
+}
+
+char *ascii_filter(const char *str) {
+        char *r, *s, *d;
+        size_t l;
+
+        assert(str);
+
+        l = strlen(str);
+        r = malloc(l + 1);
+        if (!r)
+                return NULL;
+
+        for (s = r, d = r; *s; s++)
+                if ((unsigned char) *s < 128)
+                        *(d++) = *s;
+
+        *d = 0;
+
+        return r;
+}
diff --git a/src/utf8.h b/src/utf8.h

new file mode 100644 (file)

index 0000000..9a72bec
--- /dev/null
+++ b/src/utf8.h
@@ -0,0 +1,33 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#ifndef fooutf8hfoo
+#define fooutf8hfoo
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "macro.h"
+
+char *utf8_is_valid(const char *s) _pure_;
+char *ascii_is_valid(const char *s) _pure_;
+
+char *utf8_filter(const char *s);
+char *ascii_filter(const char *s);
+
+#endif
diff --git a/src/util.c b/src/util.c

index bf22f575c7702201c6d7e452883dd1a84a7bcecc..3a855c1be48e5f97e9f8bc6d476cc09a7b5d5aea 100644 (file)
--- a/src/util.c
+++ b/src/util.c
@@ -1833,7 +1833,8 @@ char *cunescape_length(const char *s, size_t length) {
  
          /* Undoes C style string escaping */
  
-        if (!(r = new(char, length+1)))
+        r = new(char, length+1);
+        if (!r)
                  return r;
  
          for (f = s, t = r; f < s + length; f++) {
@@ -1887,8 +1888,10 @@ char *cunescape_length(const char *s, size_t length) {
                          /* hexadecimal encoding */
                          int a, b;
  
-                        if ((a = unhexchar(f[1])) < 0 ||
-                            (b = unhexchar(f[2])) < 0) {
+                        a = unhexchar(f[1]);
+                        b = unhexchar(f[2]);
+
+                        if (a < 0 || b < 0) {
                                  /* Invalid escape code, let's take it literal then */
                                  *(t++) = '\\';
                                  *(t++) = 'x';
@@ -1911,9 +1914,11 @@ char *cunescape_length(const char *s, size_t length) {
                          /* octal encoding */
                          int a, b, c;
  
-                        if ((a = unoctchar(f[0])) < 0 ||
-                            (b = unoctchar(f[1])) < 0 ||
-                            (c = unoctchar(f[2])) < 0) {
+                        a = unoctchar(f[0]);
+                        b = unoctchar(f[1]);
+                        c = unoctchar(f[2]);
+
+                        if (a < 0 || b < 0 || c < 0) {
                                  /* Invalid escape code, let's take it literal then */
                                  *(t++) = '\\';
                                  *(t++) = f[0];
author	Lennart Poettering <lennart@poettering.net>
	Mon, 12 Mar 2012 21:22:16 +0000 (22:22 +0100)
committer	Lennart Poettering <lennart@poettering.net>
	Mon, 12 Mar 2012 21:22:21 +0000 (22:22 +0100)
Makefile.am		patch \| blob \| history
TODO		patch \| blob \| history
src/conf-parser.c		patch \| blob \| history
src/load-fragment.c		patch \| blob \| history
src/service.c		patch \| blob \| history
src/utf8.c	[new file with mode: 0644]	patch \| blob
src/utf8.h	[new file with mode: 0644]	patch \| blob
src/util.c		patch \| blob \| history