+/** @brief Word_Break property tailor that treats underscores as spaces */
+static int tailor_underscore_Word_Break_Other(uint32_t c) {
+ switch(c) {
+ default:
+ return -1;
+ case 0x005F: /* LOW LINE (SPACING UNDERSCORE) */
+ return unicode_Word_Break_Other;
+ }
+}
+
+/** @brief Normalize and split a string using a given tailoring */
+static void word_split(struct vector *v,
+ const char *s,
+ unicode_property_tailor *pt) {
+ size_t nw, nt32, i;
+ uint32_t *t32, **w32;
+
+ /* Convert to UTF-32 */
+ if(!(t32 = utf8_to_utf32(s, strlen(s), &nt32)))
+ return;
+ /* Erase case distinctions */
+ if(!(t32 = utf32_casefold_compat(t32, nt32, &nt32)))
+ return;
+ /* Split into words, treating _ as a space */
+ w32 = utf32_word_split(t32, nt32, &nw, pt);
+ /* Convert words back to UTF-8 and append to result */
+ for(i = 0; i < nw; ++i)
+ vector_append(v, utf32_to_utf8(w32[i], utf32_len(w32[i]), 0));
+}
+