chiark / gitweb /
utf8: add utf8_n_codepoints() for counting complete utf8 codepoints in a string
authorLennart Poettering <lennart@poettering.net>
Wed, 14 Feb 2018 17:41:03 +0000 (18:41 +0100)
committerSven Eden <yamakuzure@gmx.net>
Wed, 30 May 2018 05:58:56 +0000 (07:58 +0200)
src/basic/utf8.c
src/basic/utf8.h

index 4da9a405cb85914c13457f5a6a4c0b6358499353..b17f420264089df0291908d92e8d99b979efdc45 100644 (file)
@@ -408,3 +408,22 @@ int utf8_encoded_valid_unichar(const char *str) {
 
         return len;
 }
+
+size_t utf8_n_codepoints(const char *str) {
+        size_t n = 0;
+
+        /* Returns the number of UTF-8 codepoints in this string, or (size_t) -1 if the string is not valid UTF-8. */
+
+        while (*str != 0) {
+                int k;
+
+                k = utf8_encoded_valid_unichar(str);
+                if (k < 0)
+                        return (size_t) -1;
+
+                str += k;
+                n++;
+        }
+
+        return n;
+}
index b0a7485aedc5f61026a12bc75d954c0cdabc85e9..7128615181020d1d77d12bb2a88f1eaa5547df01 100644 (file)
@@ -59,3 +59,5 @@ static inline bool utf16_is_trailing_surrogate(char16_t c) {
 static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t trail) {
                 return ((lead - 0xd800) << 10) + (trail - 0xdc00) + 0x10000;
 }
+
+size_t utf8_n_codepoints(const char *str);