basic/env-util: when serializing, actually use escaping

[elogind.git] / src / basic / escape.c
diff --git a/src/basic/escape.c b/src/basic/escape.c

index 4815161b092afb5d1d04eef1856b8db7341d6ca9..d43bdaebfc9922e0294a00a8b8dc4e2e45b3193c 100644 (file)
--- a/src/basic/escape.c
+++ b/src/basic/escape.c
@@ -1,5 +1,3 @@
-/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
-
  /***
    This file is part of systemd.
  
@@ -19,12 +17,15 @@
    along with systemd; If not, see <http://www.gnu.org/licenses/>.
  ***/
  
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
  #include "alloc-util.h"
  #include "escape.h"
  #include "hexdecoct.h"
-#include "string-util.h"
+#include "macro.h"
  #include "utf8.h"
-#include "util.h"
  
  size_t cescape_char(char c, char *buf) {
          char * buf_old = buf;
@@ -89,20 +90,20 @@ size_t cescape_char(char c, char *buf) {
          return buf - buf_old;
  }
  
-char *cescape(const char *s) {
-        char *r, *t;
+char *cescape_length(const char *s, size_t n) {
          const char *f;
+        char *r, *t;
  
-        assert(s);
+        assert(s || n == 0);
  
          /* Does C style string escaping. May be reversed with
           * cunescape(). */
  
-        r = new(char, strlen(s)*4 + 1);
+        r = new(char, n*4 + 1);
          if (!r)
                  return NULL;
  
-        for (f = s, t = r; *f; f++)
+        for (f = s, t = r; f < s + n; f++)
                  t += cescape_char(*f, t);
  
          *t = 0;
@@ -110,16 +111,24 @@ char *cescape(const char *s) {
          return r;
  }
  
-int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
+char *cescape(const char *s) {
+        assert(s);
+
+        return cescape_length(s, strlen(s));
+}
+
+int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
          int r = 1;
  
          assert(p);
          assert(*p);
          assert(ret);
  
-        /* Unescapes C style. Returns the unescaped character in ret,
-         * unless we encountered a \u sequence in which case the full
-         * unicode character is returned in ret_unicode, instead. */
+        /* Unescapes C style. Returns the unescaped character in ret.
+         * Sets *eight_bit to true if the escaped sequence either fits in
+         * one byte in UTF-8 or is a non-unicode literal byte and should
+         * instead be copied directly.
+         */
  
          if (length != (size_t) -1 && length < 1)
                  return -EINVAL;
@@ -181,7 +190,8 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
                  if (a == 0 && b == 0)
                          return -EINVAL;
  
-                *ret = (char) ((a << 4U) | b);
+                *ret = (a << 4U) | b;
+                *eight_bit = true;
                  r = 3;
                  break;
          }
@@ -208,16 +218,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
                  if (c == 0)
                          return -EINVAL;
  
-                if (c < 128)
-                        *ret = c;
-                else {
-                        if (!ret_unicode)
-                                return -EINVAL;
-
-                        *ret = 0;
-                        *ret_unicode = c;
-                }
-
+                *ret = c;
                  r = 5;
                  break;
          }
@@ -227,7 +228,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
  
                  int a[8];
                  unsigned i;
-                uint32_t c;
+                char32_t c;
  
                  if (length != (size_t) -1 && length < 9)
                          return -EINVAL;
@@ -249,16 +250,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
                  if (!unichar_is_valid(c))
                          return -EINVAL;
  
-                if (c < 128)
-                        *ret = c;
-                else {
-                        if (!ret_unicode)
-                                return -EINVAL;
-
-                        *ret = 0;
-                        *ret_unicode = c;
-                }
-
+                *ret = c;
                  r = 9;
                  break;
          }
@@ -273,7 +265,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
          case '7': {
                  /* octal encoding */
                  int a, b, c;
-                uint32_t m;
+                char32_t m;
  
                  if (length != (size_t) -1 && length < 3)
                          return -EINVAL;
@@ -300,6 +292,7 @@ int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode
                          return -EINVAL;
  
                  *ret = m;
+                *eight_bit = true;
                  r = 3;
                  break;
          }
@@ -332,15 +325,15 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi
  
          for (f = s, t = r + pl; f < s + length; f++) {
                  size_t remaining;
-                uint32_t u;
-                char c;
+                bool eight_bit = false;
+                char32_t u;
                  int k;
  
                  remaining = s + length - f;
                  assert(remaining > 0);
  
                  if (*f != '\\') {
-                        /* A literal literal, copy verbatim */
+                        /* A literal, copy verbatim */
                          *(t++) = *f;
                          continue;
                  }
@@ -356,7 +349,7 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi
                          return -EINVAL;
                  }
  
-                k = cunescape_one(f + 1, remaining - 1, &c, &u);
+                k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
                  if (k < 0) {
                          if (flags & UNESCAPE_RELAX) {
                                  /* Invalid escape code, let's take it literal then */
@@ -368,14 +361,13 @@ int cunescape_length_with_prefix(const char *s, size_t length, const char *prefi
                          return k;
                  }
  
-                if (c != 0)
-                        /* Non-Unicode? Let's encode this directly */
-                        *(t++) = c;
+                f += k;
+                if (eight_bit)
+                        /* One byte? Set directly as specified */
+                        *(t++) = u;
                  else
-                        /* Unicode? Then let's encode this in UTF-8 */
+                        /* Otherwise encode as multi-byte UTF-8 */
                          t += utf8_encode_unichar(t, u);
-
-                f += k;
          }
  
          *t = 0;
@@ -421,10 +413,45 @@ char *xescape(const char *s, const char *bad) {
          return r;
  }
  
-static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
+#if 0 /// UNNEEDED by elogind
+char *octescape(const char *s, size_t len) {
+        char *r, *t;
+        const char *f;
+
+        /* Escapes all chars in bad, in addition to \ and " chars,
+         * in \nnn style escaping. */
+
+        r = new(char, len * 4 + 1);
+        if (!r)
+                return NULL;
+
+        for (f = s, t = r; f < s + len; f++) {
+
+                if (*f < ' ' || *f >= 127 || *f == '\\' || *f == '"') {
+                        *(t++) = '\\';
+                        *(t++) = '0' + (*f >> 6);
+                        *(t++) = '0' + ((*f >> 3) & 8);
+                        *(t++) = '0' + (*f & 8);
+                } else
+                        *(t++) = *f;
+        }
+
+        *t = 0;
+
+        return r;
+
+}
+
+static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
          assert(bad);
  
          for (; *s; s++) {
+                if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
+                        *(t++) = '\\';
+                        *(t++) = *s == '\n' ? 'n' : 't';
+                        continue;
+                }
+
                  if (*s == '\\' || strchr(bad, *s))
                          *(t++) = '\\';
  
@@ -441,20 +468,21 @@ char *shell_escape(const char *s, const char *bad) {
          if (!r)
                  return NULL;
  
-        t = strcpy_backslash_escaped(r, s, bad);
+        t = strcpy_backslash_escaped(r, s, bad, false);
          *t = 0;
  
          return r;
  }
  
-char *shell_maybe_quote(const char *s) {
+char* shell_maybe_quote(const char *s, EscapeStyle style) {
          const char *p;
          char *r, *t;
  
          assert(s);
  
-        /* Encloses a string in double quotes if necessary to make it
-         * OK as shell string. */
+        /* Encloses a string in quotes if necessary to make it OK as a shell
+         * string. Note that we treat benign UTF-8 characters as needing
+         * escaping too, but that should be OK. */
  
          for (p = s; *p; p++)
                  if (*p <= ' ' ||
@@ -465,18 +493,32 @@ char *shell_maybe_quote(const char *s) {
          if (!*p)
                  return strdup(s);
  
-        r = new(char, 1+strlen(s)*2+1+1);
+        r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
          if (!r)
                  return NULL;
  
          t = r;
-        *(t++) = '"';
+        if (style == ESCAPE_BACKSLASH)
+                *(t++) = '"';
+        else if (style == ESCAPE_POSIX) {
+                *(t++) = '$';
+                *(t++) = '\'';
+        } else
+                assert_not_reached("Bad EscapeStyle");
+
          t = mempcpy(t, s, p - s);
  
-        t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
+        if (style == ESCAPE_BACKSLASH)
+                t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
+        else
+                t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
  
-        *(t++)= '"';
+        if (style == ESCAPE_BACKSLASH)
+                *(t++) = '"';
+        else
+                *(t++) = '\'';
          *t = 0;
  
          return r;
  }
+#endif // 0