-/** @brief Convert UTF-8 to UCS-4
- * @param mb Pointer to 0-terminated UTF-8 string
- * @return Pointer to 0-terminated UCS-4 string
- *
- * Not everybody's iconv supports UCS-4, and it's inconvenient to have to know
- * our endianness, and it's easy to convert it ourselves, so we do. See also
- * @ref ucs42utf8().
- */
-uint32_t *utf82ucs4(const char *mb) {
- struct dynstr_ucs4 d;
- uint32_t c;
-
- dynstr_ucs4_init(&d);
- while(*mb) {
- PARSE_UTF8(mb, c,
- error(0, "invalid UTF-8 sequence"); return 0;);
- dynstr_ucs4_append(&d, c);
- }
- dynstr_ucs4_terminate(&d);
- return d.vec;
-}
-
-/** @brief Convert UCS-4 to UTF-8
- * @param u Pointer to 0-terminated UCS-4 string
- * @return Pointer to 0-terminated UTF-8 string
- *
- * See @ref utf82ucs4().
- */
-char *ucs42utf8(const uint32_t *u) {
- struct dynstr d;
- uint32_t c;
-
- dynstr_init(&d);
- while((c = *u++)) {
- if(c < 0x80)
- dynstr_append(&d, c);
- else if(c < 0x800) {
- dynstr_append(&d, 0xC0 | (c >> 6));
- dynstr_append(&d, 0x80 | (c & 0x3F));
- } else if(c < 0x10000) {
- dynstr_append(&d, 0xE0 | (c >> 12));
- dynstr_append(&d, 0x80 | ((c >> 6) & 0x3F));
- dynstr_append(&d, 0x80 | (c & 0x3F));
- } else if(c < 0x110000) {
- dynstr_append(&d, 0xF0 | (c >> 18));
- dynstr_append(&d, 0x80 | ((c >> 12) & 0x3F));
- dynstr_append(&d, 0x80 | ((c >> 6) & 0x3F));
- dynstr_append(&d, 0x80 | (c & 0x3F));
- } else {
- error(0, "invalid UCS-4 character");
- return 0;
- }
- }
- dynstr_terminate(&d);
- return d.vec;
-}
-