From 550a40eceb7d1917152fc9317bf2696708d52bc2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 17 Jan 2014 21:28:41 -0500 Subject: [PATCH] core: do not print invalid utf-8 in error messages --- TODO | 3 --- src/shared/fileio.c | 11 +++++++---- src/shared/utf8.c | 26 ++++++++++++++++++++++++++ src/shared/utf8.h | 3 +++ src/test/test-utf8.c | 17 +++++++++++++++++ 5 files changed, 53 insertions(+), 7 deletions(-) diff --git a/TODO b/TODO index 57e82ff26..1a1e88931 100644 --- a/TODO +++ b/TODO @@ -16,9 +16,6 @@ Bugfixes: * properly handle .mount unit state tracking when two mount points are stacked one on top of another on the exact same mount point. -* When we detect invalid UTF-8, we cannot use it in an error message: - log...("Path is not UTF-8 clean, ignoring assignment: %s", rvalue); - * shorten the message to sane length: Cannot add dependency job for unit display-manager.service, ignoring: Unit display-manager.service failed to load: No such file or directory. See system logs and 'systemctl status display-manager.service' for details. diff --git a/src/shared/fileio.c b/src/shared/fileio.c index 838d12843..b81eeb272 100644 --- a/src/shared/fileio.c +++ b/src/shared/fileio.c @@ -539,15 +539,18 @@ static int parse_env_file_push(const char *filename, unsigned line, va_list aq, *ap = userdata; if (!utf8_is_valid(key)) { - log_error("%s:%u: invalid UTF-8 for key '%s', ignoring.", - filename, line, key); + _cleanup_free_ char *p = utf8_escape_invalid(key); + + log_error("%s:%u: invalid UTF-8 in key '%s', ignoring.", + filename, line, p); return -EINVAL; } if (value && !utf8_is_valid(value)) { - /* FIXME: filter UTF-8 */ + _cleanup_free_ char *p = utf8_escape_invalid(value); + log_error("%s:%u: invalid UTF-8 value for key %s: '%s', ignoring.", - filename, line, key, value); + filename, line, key, p); return -EINVAL; } diff --git a/src/shared/utf8.c b/src/shared/utf8.c index 6e5ba9abf..0b524d8a9 100644 --- a/src/shared/utf8.c +++ b/src/shared/utf8.c @@ -174,6 +174,32 @@ const char *utf8_is_valid(const char *str) { return str; } +char *utf8_escape_invalid(const char *str) { + char *p, *s; + + assert(str); + + p = s = malloc(strlen(str) * 4 + 1); + if (!p) + return NULL; + + while (*str) { + int len; + + len = utf8_encoded_valid_unichar(str); + if (len > 0) { + s = mempcpy(s, str, len); + str += len; + } else { + s = mempcpy(s, UTF8_REPLACEMENT_CHARACTER, strlen(UTF8_REPLACEMENT_CHARACTER)); + str += 1; + } + } + *s = '\0'; + + return p; +} + char *ascii_is_valid(const char *str) { const char *p; diff --git a/src/shared/utf8.h b/src/shared/utf8.h index f56077438..c0eb73a21 100644 --- a/src/shared/utf8.h +++ b/src/shared/utf8.h @@ -25,8 +25,11 @@ #include "macro.h" +#define UTF8_REPLACEMENT_CHARACTER "\xef\xbf\xbd" + const char *utf8_is_valid(const char *s) _pure_; char *ascii_is_valid(const char *s) _pure_; +char *utf8_escape_invalid(const char *s); bool utf8_is_printable(const char* str, size_t length) _pure_; diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c index d2198fdc7..b7d988f22 100644 --- a/src/test/test-utf8.c +++ b/src/test/test-utf8.c @@ -50,11 +50,28 @@ static void test_utf8_encoded_valid_unichar(void) { } +static void test_utf8_escaping(void) { + _cleanup_free_ char *p1, *p2, *p3; + + p1 = utf8_escape_invalid("goo goo goo"); + puts(p1); + assert_se(utf8_is_valid(p1)); + + p2 = utf8_escape_invalid("\341\204\341\204"); + puts(p2); + assert_se(utf8_is_valid(p2)); + + p3 = utf8_escape_invalid("\341\204"); + puts(p3); + assert_se(utf8_is_valid(p3)); +} + int main(int argc, char *argv[]) { test_utf8_is_valid(); test_utf8_is_printable(); test_ascii_is_valid(); test_utf8_encoded_valid_unichar(); + test_utf8_escaping(); return 0; } -- 2.30.2