From: Zbigniew Jędrzejewski-Szmek Date: Sat, 9 Jun 2018 11:41:24 +0000 (+0200) Subject: basic/utf8: add ascii_is_valid_n() X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=e62e8b3d2e9ac7f1e1a13578eb989f751b6c94ae;p=elogind.git basic/utf8: add ascii_is_valid_n() --- diff --git a/src/basic/utf8.c b/src/basic/utf8.c index 1e86949a4..af86f74d1 100644 --- a/src/basic/utf8.c +++ b/src/basic/utf8.c @@ -247,6 +247,9 @@ char *utf8_escape_non_printable(const char *str) { char *ascii_is_valid(const char *str) { const char *p; + /* Check whether the string consists of valid ASCII bytes, + * i.e values between 0 and 127, inclusive. */ + assert(str); for (p = str; *p; p++) @@ -256,6 +259,21 @@ char *ascii_is_valid(const char *str) { return (char*) str; } +char *ascii_is_valid_n(const char *str, size_t len) { + size_t i; + + /* Very similar to ascii_is_valid(), but checks exactly len + * bytes and rejects any NULs in that range. */ + + assert(str); + + for (i = 0; i < len; i++) + if ((unsigned char) str[i] >= 128 || str[i] == 0) + return NULL; + + return (char*) str; +} + /** * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8 * @out_utf8: output buffer of at least 4 bytes or NULL diff --git a/src/basic/utf8.h b/src/basic/utf8.h index 7d68105a0..d6936ea46 100644 --- a/src/basic/utf8.h +++ b/src/basic/utf8.h @@ -22,6 +22,7 @@ bool unichar_is_valid(char32_t c); const char *utf8_is_valid(const char *s) _pure_; char *ascii_is_valid(const char *s) _pure_; +char *ascii_is_valid_n(const char *str, size_t len); bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_; #define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true) diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c index ec963437b..d645dc958 100644 --- a/src/test/test-utf8.c +++ b/src/test/test-utf8.c @@ -24,11 +24,21 @@ static void test_utf8_is_valid(void) { } static void test_ascii_is_valid(void) { - assert_se(ascii_is_valid("alsdjf\t\vbarr\nba z")); + assert_se( ascii_is_valid("alsdjf\t\vbarr\nba z")); assert_se(!ascii_is_valid("\342\204\242")); assert_se(!ascii_is_valid("\341\204")); } +static void test_ascii_is_valid_n(void) { + assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 17)); + assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 16)); + assert_se(!ascii_is_valid_n("alsdjf\t\vbarr\nba z", 18)); + assert_se(!ascii_is_valid_n("\342\204\242", 3)); + assert_se(!ascii_is_valid_n("\342\204\242", 2)); + assert_se(!ascii_is_valid_n("\342\204\242", 1)); + assert_se( ascii_is_valid_n("\342\204\242", 0)); +} + static void test_utf8_encoded_valid_unichar(void) { assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3); assert_se(utf8_encoded_valid_unichar("\302\256") == 2); @@ -115,6 +125,7 @@ int main(int argc, char *argv[]) { test_utf8_is_valid(); test_utf8_is_printable(); test_ascii_is_valid(); + test_ascii_is_valid_n(); test_utf8_encoded_valid_unichar(); test_utf8_escaping(); test_utf8_escaping_printable();