From: Richard Kettlewell Date: Sat, 17 Nov 2007 22:01:22 +0000 (+0000) Subject: untested grapheme cluster boundary detection X-Git-Tag: debian-1_5_99dev9~1^2~50 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/commitdiff_plain/35b651f0cc0999deae42c92f2cbca3ecf88fe157 untested grapheme cluster boundary detection --- diff --git a/.bzrignore b/.bzrignore index 2fbeedd..8bd29c1 100644 --- a/.bzrignore +++ b/.bzrignore @@ -120,3 +120,4 @@ disobedience/disobedience.html lib/NormalizationTest.txt lib/CaseFolding.txt lib/UnicodeData.txt +lib/GraphemeBreakProperty.txt diff --git a/lib/unicode.c b/lib/unicode.c index 749916a..bac9e83 100644 --- a/lib/unicode.c +++ b/lib/unicode.c @@ -21,7 +21,8 @@ * @brief Unicode support functions * * Here by UTF-8 and UTF-8 we mean the encoding forms of those names (not the - * encoding schemes). + * encoding schemes). The primary encoding form is UTF-32 but convenience + * wrappers using UTF-8 are provided for a number of functions. * * The idea is that all the strings that hit the database will be in a * particular normalization form, and for the search and tags database @@ -572,6 +573,125 @@ int utf32_cmp(const uint32_t *a, const uint32_t *b) { return *a < *b ? -1 : (*a > *b ? 1 : 0); } +/** @brief Return the General_Category value for @p c + * @param Code point + * @return General_Category property value + */ +static inline enum unicode_gc_cat utf32__general_category(uint32_t c) { + if(c < UNICODE_NCHARS) { + const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS]; + return ud->gc; + } else + return unicode_gc_Cn; +} + +/** @brief Check Grapheme_Cluster_Break property + * @param c Code point + * @return 0 if it is as described, 1 otherwise + */ +static int utf32__is_control_or_cr_or_lf(uint32_t c) { + switch(utf32__general_category(c)) { + default: + return 0; + case unicode_gc_Zl: + case unicode_gc_Zp: + case unicode_gc_Cc: + return 1; + case unicode_gc_Cf: + if(c == 0x200C || c == 0x200D) + return 0; + return 1; + } +} + +#define Hangul_Syllable_Type_NA 0 +#define Hangul_Syllable_Type_L 0x1100 +#define Hangul_Syllable_Type_V 0x1160 +#define Hangul_Syllable_Type_T 0x11A8 +#define Hangul_Syllable_Type_LV 0xAC00 +#define Hangul_Syllable_Type_LVT 0xAC01 + +/** @brief Determine Hangul_Syllable_Type of @p c + * @param c Code point + * @return Equivalance class of @p c, or Hangul_Syllable_Type_NA + * + * If this is a Hangul character then a representative member of its + * equivalence class is returned. Otherwise Hangul_Syllable_Type_NA is + * returned. + */ +static uint32_t utf32__hangul_syllable_type(uint32_t c) { + /* Dispose of the bulk of the non-Hangul code points first */ + if(c < 0x1100) return Hangul_Syllable_Type_NA; + if(c > 0x1200 && c < 0xAC00) return Hangul_Syllable_Type_NA; + if(c >= 0xD800) return Hangul_Syllable_Type_NA; + /* Now we pick out the assigned Hangul code points */ + if((c >= 0x1100 && c <= 0x1159) || c == 0x115F) return Hangul_Syllable_Type_L; + if(c >= 0x1160 && c <= 0x11A2) return Hangul_Syllable_Type_V; + if(c >= 0x11A8 && c <= 0x11F9) return Hangul_Syllable_Type_T; + if(c >= 0xAC00 && c <= 0xD7A3) { + if(c % 28 == 16) + return Hangul_Syllable_Type_LV; + else + return Hangul_Syllable_Type_LVT; + } + return Hangul_Syllable_Type_NA; +} + +/** @brief Identify a grapheme cluster boundary + * @param s Start of string (must be NFD) + * @param ns Length of string + * @param n Index within string (in [0,ns].) + * @return 1 at a grapheme cluster boundary, 0 otherwise + * + * This function identifies default grapheme cluster boundaries as described in + * UAX #29 s3. It returns 1 if @p n points at the code point just after a + * grapheme cluster boundary (including the hypothetical code point just after + * the end of the string). + * + * The string must be in NFD (or NFKD) for this function to work (currently). + */ +int utf32_is_gcb(const uint32_t *s, size_t ns, size_t n) { + uint32_t before, after; + uint32_t hbefore, hafter; + /* GB1 and GB2 */ + if(n == 0 || n == ns) + return 1; + /* Now we know that s[n-1] and s[n] are safe to inspect */ + /* GB3 */ + before = s[n-1]; + after = s[n]; + if(before == 0x000D && after == 0x000A) + return 0; + /* GB4 and GB5 */ + if(utf32__is_control_or_cr_or_lf(before) + || utf32__is_control_or_cr_or_lf(after)) + return 1; + hbefore = utf32__hangul_syllable_type(before); + hafter = utf32__hangul_syllable_type(after); + /* GB6 */ + if(hbefore == Hangul_Syllable_Type_L + && hafter != Hangul_Syllable_Type_NA) + return 0; + /* GB7 */ + if((hbefore == Hangul_Syllable_Type_LV + || hbefore == Hangul_Syllable_Type_V) + && (hafter == Hangul_Syllable_Type_V + || hafter == Hangul_Syllable_Type_T)) + return 0; + /* GB8 */ + if((hbefore == Hangul_Syllable_Type_LVT + || hbefore == Hangul_Syllable_Type_T) + && hafter == Hangul_Syllable_Type_T) + return 0; + /* GB9 */ + if(after < UNICODE_NCHARS + && (unidata[after / UNICODE_MODULUS][after % UNICODE_MODULUS].flags + & unicode_grapheme_break_extend)) + return 0; + /* GB10 */ + return 1; +} + /*@}*/ /** @defgroup Functions that operate on UTF-8 strings */ /*@{*/ diff --git a/lib/unicode.h b/lib/unicode.h index ae7bb4e..bf69cc3 100644 --- a/lib/unicode.h +++ b/lib/unicode.h @@ -42,6 +42,7 @@ char *utf8_casefold_canon(const char *s, size_t ns, size_t *ndp); uint32_t *utf32_casefold_compat(const uint32_t *s, size_t ns, size_t *ndp); char *utf8_casefold_compat(const char *s, size_t ns, size_t *ndp); +int utf32_is_gcb(const uint32_t *s, size_t ns, size_t n); #endif /* UNICODE_H */ diff --git a/lib/unidata.c b/lib/unidata.c index 835113c..a73b159 100644 --- a/lib/unidata.c +++ b/lib/unidata.c @@ -5379,118 +5379,118 @@ static const struct unidata st5[] = { {0,0,0,0,0,0,Sk,0} }; static const struct unidata st6[] = { -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,232,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,232,Mn,0}, -{0,0,0,0,0,216,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,202,Mn,0}, -{0,0,0,0,0,202,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,202,Mn,0}, -{0,0,0,0,0,202,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{cd303,cd303,0,0,0,230,Mn,0}, -{cd304,cd304,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{cd305,cd305,0,0,0,230,Mn,0}, -{cd306,cd306,0,0,0,230,Mn,0}, -{0,0,cf226,84,0,240,Mn,unicode_normalize_before_casefold}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,232,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,233,Mn,0}, -{0,0,0,0,0,234,Mn,0}, -{0,0,0,0,0,234,Mn,0}, -{0,0,0,0,0,233,Mn,0}, -{0,0,0,0,0,234,Mn,0}, -{0,0,0,0,0,234,Mn,0}, -{0,0,0,0,0,233,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,232,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,232,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,216,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,202,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,202,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,202,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,202,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{cd303,cd303,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{cd304,cd304,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{cd305,cd305,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{cd306,cd306,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,cf226,84,0,240,Mn,unicode_normalize_before_casefold|unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,232,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,233,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,234,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,234,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,233,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,234,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,234,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,233,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -5772,13 +5772,13 @@ static const struct unidata st9[] = { {0,0,cf342,0,1,0,Lu,0}, {0,0,0,-1,0,0,Ll,0}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Me,0}, -{0,0,0,0,0,0,Me,0}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, {0,0,cf343,0,1,0,Lu,0}, {0,0,0,-1,0,0,Ll,0}, {0,0,cf344,0,1,0,Lu,0}, @@ -6046,61 +6046,61 @@ static const struct unidata st11[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,222,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,222,Mn,0}, -{0,0,0,0,0,228,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,10,Mn,0}, -{0,0,0,0,0,11,Mn,0}, -{0,0,0,0,0,12,Mn,0}, -{0,0,0,0,0,13,Mn,0}, -{0,0,0,0,0,14,Mn,0}, -{0,0,0,0,0,15,Mn,0}, -{0,0,0,0,0,16,Mn,0}, -{0,0,0,0,0,17,Mn,0}, -{0,0,0,0,0,18,Mn,0}, -{0,0,0,0,0,19,Mn,0}, -{0,0,0,0,0,19,Mn,0}, -{0,0,0,0,0,20,Mn,0}, -{0,0,0,0,0,21,Mn,0}, -{0,0,0,0,0,22,Mn,0}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,222,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,222,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,228,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,10,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,11,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,12,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,13,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,14,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,15,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,16,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,17,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,18,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,19,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,19,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,20,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,21,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,22,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Po,0}, -{0,0,0,0,0,23,Mn,0}, +{0,0,0,0,0,23,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Po,0}, -{0,0,0,0,0,24,Mn,0}, -{0,0,0,0,0,25,Mn,0}, +{0,0,0,0,0,24,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,25,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Po,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Po,0}, -{0,0,0,0,0,18,Mn,0}, +{0,0,0,0,0,18,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -6175,12 +6175,12 @@ static const struct unidata st12[] = { {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -6234,26 +6234,26 @@ static const struct unidata st12[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,27,Mn,0}, -{0,0,0,0,0,28,Mn,0}, -{0,0,0,0,0,29,Mn,0}, -{0,0,0,0,0,30,Mn,0}, -{0,0,0,0,0,31,Mn,0}, -{0,0,0,0,0,32,Mn,0}, -{0,0,0,0,0,33,Mn,0}, -{0,0,0,0,0,34,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,27,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,28,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,29,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,30,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,31,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,32,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,33,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,34,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Nd,0}, {0,0,0,0,0,0,Nd,0}, @@ -6271,7 +6271,7 @@ static const struct unidata st12[] = { {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,35,Mn,0}, +{0,0,0,0,0,35,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, @@ -6375,30 +6375,30 @@ static const struct unidata st13[] = { {cd409,cd409,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cf,0}, -{0,0,0,0,0,0,Me,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lm,0}, {0,0,0,0,0,0,Lm,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Nd,0}, @@ -6436,7 +6436,7 @@ static const struct unidata st14[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cf,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,36,Mn,0}, +{0,0,0,0,0,36,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, @@ -6467,33 +6467,33 @@ static const struct unidata st14[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Lo,0}, @@ -6587,17 +6587,17 @@ static const struct unidata st15[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -6656,15 +6656,15 @@ static const struct unidata st15[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lm,0}, {0,0,0,0,0,0,Lm,0}, {0,0,0,0,0,0,So,0}, @@ -6810,8 +6810,8 @@ static const struct unidata st16[] = { }; static const struct unidata st17[] = { {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, @@ -6869,31 +6869,31 @@ static const struct unidata st17[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,7,Mn,0}, +{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -6907,8 +6907,8 @@ static const struct unidata st17[] = { {cd420,cd420,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Nd,0}, @@ -6940,7 +6940,7 @@ static const struct unidata st17[] = { }; static const struct unidata st18[] = { {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Cn,0}, @@ -6999,15 +6999,15 @@ static const struct unidata st18[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,7,Mn,0}, +{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Mc,0}, @@ -7016,7 +7016,7 @@ static const struct unidata st18[] = { {0,0,0,0,0,0,Cn,0}, {cd421,cd421,0,0,0,0,Mc,0}, {cd422,cd422,0,0,0,0,Mc,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7026,7 +7026,7 @@ static const struct unidata st18[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mc,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7037,8 +7037,8 @@ static const struct unidata st18[] = { {cd425,cd425,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Nd,0}, @@ -7070,8 +7070,8 @@ static const struct unidata st18[] = { }; static const struct unidata st19[] = { {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Lo,0}, @@ -7129,24 +7129,24 @@ static const struct unidata st19[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,7,Mn,0}, +{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7181,8 +7181,8 @@ static const struct unidata st19[] = { {0,0,0,0,0,0,Nd,0}, {0,0,0,0,0,0,Nd,0}, {0,0,0,0,0,0,Nd,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, @@ -7200,8 +7200,8 @@ static const struct unidata st19[] = { }; static const struct unidata st20[] = { {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Lo,0}, @@ -7259,24 +7259,24 @@ static const struct unidata st20[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,7,Mn,0}, +{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Cn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Lo,0}, @@ -7297,8 +7297,8 @@ static const struct unidata st20[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Nd,0}, @@ -7330,7 +7330,7 @@ static const struct unidata st20[] = { }; static const struct unidata st21[] = { {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Cn,0}, @@ -7389,14 +7389,14 @@ static const struct unidata st21[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,7,Mn,0}, +{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7406,7 +7406,7 @@ static const struct unidata st21[] = { {0,0,0,0,0,0,Cn,0}, {cd433,cd433,0,0,0,0,Mc,0}, {cd434,cd434,0,0,0,0,Mc,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7415,8 +7415,8 @@ static const struct unidata st21[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mc,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7461,7 +7461,7 @@ static const struct unidata st21[] = { static const struct unidata st22[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Lo,0}, @@ -7521,9 +7521,9 @@ static const struct unidata st22[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Cn,0}, @@ -7536,7 +7536,7 @@ static const struct unidata st22[] = { {cd438,cd438,0,0,0,0,Mc,0}, {cd439,cd439,0,0,0,0,Mc,0}, {cd440,cd440,0,0,0,0,Mc,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7546,7 +7546,7 @@ static const struct unidata st22[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mc,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7651,22 +7651,22 @@ static const struct unidata st23[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{cd441,cd441,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd441,cd441,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7674,8 +7674,8 @@ static const struct unidata st23[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,84,Mn,0}, -{0,0,0,0,0,91,Mn,0}, +{0,0,0,0,0,84,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,91,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7779,24 +7779,24 @@ static const struct unidata st24[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,7,Mn,0}, +{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {cd442,cd442,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mc,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {cd443,cd443,0,0,0,0,Mc,0}, {cd444,cd444,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Cn,0}, {cd445,cd445,0,0,0,0,Mc,0}, {cd446,cd446,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7804,8 +7804,8 @@ static const struct unidata st24[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mc,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7817,8 +7817,8 @@ static const struct unidata st24[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Nd,0}, @@ -7911,12 +7911,12 @@ static const struct unidata st25[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Mc,0}, @@ -7926,7 +7926,7 @@ static const struct unidata st25[] = { {cd447,cd447,0,0,0,0,Mc,0}, {cd448,cd448,0,0,0,0,Mc,0}, {cd449,cd449,0,0,0,0,Mc,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -7936,7 +7936,7 @@ static const struct unidata st25[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mc,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -8053,19 +8053,19 @@ static const struct unidata st26[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, @@ -8074,7 +8074,7 @@ static const struct unidata st26[] = { {cd451,cd451,0,0,0,0,Mc,0}, {cd452,cd452,0,0,0,0,Mc,0}, {cd453,cd453,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mc,0}, +{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -8158,16 +8158,16 @@ static const struct unidata st27[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {cd454,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,103,Mn,0}, -{0,0,0,0,0,103,Mn,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,103,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,103,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -8180,14 +8180,14 @@ static const struct unidata st27[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lm,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,107,Mn,0}, -{0,0,0,0,0,107,Mn,0}, -{0,0,0,0,0,107,Mn,0}, -{0,0,0,0,0,107,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,107,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,107,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,107,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,107,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Nd,0}, {0,0,0,0,0,0,Nd,0}, @@ -8288,18 +8288,18 @@ static const struct unidata st28[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {cd455,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,118,Mn,0}, -{0,0,0,0,0,118,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,118,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,118,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -8311,12 +8311,12 @@ static const struct unidata st28[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Lm,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,122,Mn,0}, -{0,0,0,0,0,122,Mn,0}, -{0,0,0,0,0,122,Mn,0}, -{0,0,0,0,0,122,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,122,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,122,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,122,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,122,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Nd,0}, @@ -8393,8 +8393,8 @@ static const struct unidata st29[] = { {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, @@ -8422,11 +8422,11 @@ static const struct unidata st29[] = { {0,0,0,0,0,0,No,0}, {0,0,0,0,0,0,No,0}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,216,Mn,0}, +{0,0,0,0,0,216,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Ps,0}, {0,0,0,0,0,0,Pe,0}, {0,0,0,0,0,0,Ps,0}, @@ -8482,31 +8482,31 @@ static const struct unidata st29[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,129,Mn,0}, -{0,0,0,0,0,130,Mn,0}, -{cd465,cd465,0,0,0,0,Mn,0}, -{0,0,0,0,0,132,Mn,0}, -{cd466,cd466,0,0,0,0,Mn,0}, -{cd467,cd467,0,0,0,0,Mn,0}, -{cd468,0,0,0,0,0,Mn,0}, -{cd469,cd469,0,0,0,0,Mn,0}, -{cd470,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,130,Mn,0}, -{0,0,0,0,0,130,Mn,0}, -{0,0,0,0,0,130,Mn,0}, -{0,0,0,0,0,130,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,129,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend}, +{cd465,cd465,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,132,Mn,unicode_grapheme_break_extend}, +{cd466,cd466,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd467,cd467,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd468,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd469,cd469,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd470,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0} }; static const struct unidata st30[] = { -{0,0,0,0,0,130,Mn,0}, -{cd471,cd471,0,0,0,0,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend}, +{cd471,cd471,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Po,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, @@ -8515,51 +8515,51 @@ static const struct unidata st30[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{cd472,cd472,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd472,cd472,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{cd473,cd473,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{cd474,cd474,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{cd475,cd475,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{cd476,cd476,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{cd477,cd477,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd473,cd473,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd474,cd474,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd475,cd475,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd476,cd476,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{cd477,cd477,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, @@ -8569,7 +8569,7 @@ static const struct unidata st30[] = { {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, @@ -8674,19 +8674,19 @@ static const struct unidata st31[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,7,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -8717,8 +8717,8 @@ static const struct unidata st31[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -9504,7 +9504,7 @@ static const struct unidata st37[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Po,0}, @@ -10207,9 +10207,9 @@ static const struct unidata st43[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -10239,9 +10239,9 @@ static const struct unidata st43[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Cn,0}, @@ -10271,8 +10271,8 @@ static const struct unidata st43[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -10303,8 +10303,8 @@ static const struct unidata st43[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -10374,13 +10374,13 @@ static const struct unidata st44[] = { {0,0,0,0,0,0,Cf,0}, {0,0,0,0,0,0,Cf,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, @@ -10389,20 +10389,20 @@ static const struct unidata st44[] = { {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,9,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Po,0}, @@ -10412,7 +10412,7 @@ static const struct unidata st44[] = { {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Sc,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Nd,0}, @@ -10460,9 +10460,9 @@ static const struct unidata st45[] = { {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Po,0}, {0,0,0,0,0,0,Po,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Zs,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Nd,0}, @@ -10620,7 +10620,7 @@ static const struct unidata st46[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,228,Mn,0}, +{0,0,0,0,0,228,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -10741,15 +10741,15 @@ static const struct unidata st47[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, @@ -10759,16 +10759,16 @@ static const struct unidata st47[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,222,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,222,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -10992,8 +10992,8 @@ static const struct unidata st49[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, @@ -11099,10 +11099,10 @@ static const struct unidata st49[] = { {0,0,0,0,0,0,Cn,0} }; static const struct unidata st50[] = { -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Lo,0}, {cd480,cd480,0,0,0,0,Lo,0}, @@ -11151,21 +11151,21 @@ static const struct unidata st50[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,7,Mn,0}, +{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {cd486,cd486,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {cd487,cd487,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, {cd488,cd488,0,0,0,0,Mc,0}, {cd489,cd489,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {cd490,cd490,0,0,0,0,Mc,0}, {0,0,0,0,0,9,Mc,0}, {0,0,0,0,0,0,Lo,0}, @@ -11206,15 +11206,15 @@ static const struct unidata st50[] = { {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, @@ -11423,17 +11423,17 @@ static const struct unidata st52[] = { {cd573,0,0,0,0,0,Lm,0}, {cd574,0,0,0,0,0,Lm,0}, {cd333,0,0,0,0,0,Lm,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -11485,8 +11485,8 @@ static const struct unidata st52[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0} +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend} }; static const struct unidata st53[] = { {cd575,cd575,cf489,0,1,0,Lu,0}, @@ -12021,8 +12021,8 @@ static const struct unidata st57[] = { {cd0,0,0,0,0,0,Zs,0}, {cd0,0,0,0,0,0,Zs,0}, {0,0,0,0,0,0,Cf,0}, -{0,0,0,0,0,0,Cf,0}, -{0,0,0,0,0,0,Cf,0}, +{0,0,0,0,0,0,Cf,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Cf,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cf,0}, {0,0,0,0,0,0,Cf,0}, {0,0,0,0,0,0,Pd,0}, @@ -12219,38 +12219,38 @@ static const struct unidata st58[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,0,Me,0}, -{0,0,0,0,0,0,Me,0}, -{0,0,0,0,0,0,Me,0}, -{0,0,0,0,0,0,Me,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,0,Me,0}, -{0,0,0,0,0,0,Me,0}, -{0,0,0,0,0,0,Me,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Me,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -15821,12 +15821,12 @@ static const struct unidata st86[] = { {0,0,0,0,0,0,Nl,0}, {0,0,0,0,0,0,Nl,0}, {0,0,0,0,0,0,Nl,0}, -{0,0,0,0,0,218,Mn,0}, -{0,0,0,0,0,228,Mn,0}, -{0,0,0,0,0,232,Mn,0}, -{0,0,0,0,0,222,Mn,0}, -{0,0,0,0,0,224,Mn,0}, -{0,0,0,0,0,224,Mn,0}, +{0,0,0,0,0,218,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,228,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,232,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,222,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,224,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,224,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Pd,0}, {0,0,0,0,0,0,Lm,0}, {0,0,0,0,0,0,Lm,0}, @@ -15934,8 +15934,8 @@ static const struct unidata st87[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,8,Mn,0}, -{0,0,0,0,0,8,Mn,0}, +{0,0,0,0,0,8,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,8,Mn,unicode_grapheme_break_extend}, {cd1524,0,0,0,0,0,Sk,0}, {cd1525,0,0,0,0,0,Sk,0}, {0,0,0,0,0,0,Lm,0}, @@ -17605,12 +17605,12 @@ static const struct unidata st100[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, @@ -17636,8 +17636,8 @@ static const struct unidata st100[] = { {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,Mc,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Mc,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, @@ -19059,7 +19059,7 @@ static const struct unidata st111[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {cd2543,cd2543,0,0,0,0,Lo,0}, -{0,0,0,0,0,26,Mn,0}, +{0,0,0,0,0,26,Mn,unicode_grapheme_break_extend}, {cd2544,cd2544,0,0,0,0,Lo,0}, {cd2545,0,0,0,0,0,Lo,0}, {cd1102,0,0,0,0,0,Lo,0}, @@ -19809,22 +19809,22 @@ static const struct unidata st116[] = { {0,0,0,0,0,0,Cn,0} }; static const struct unidata st117[] = { -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {cd2903,0,0,0,0,0,Po,0}, {cd2904,0,0,0,0,0,Po,0}, {cd2905,0,0,0,0,0,Po,0}, @@ -19841,10 +19841,10 @@ static const struct unidata st117[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, @@ -21630,21 +21630,21 @@ static const struct unidata st130[] = { }; static const struct unidata st131[] = { {0,0,0,0,0,0,Lo,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,0,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,0,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, {0,0,0,0,0,0,Lo,0}, @@ -21685,14 +21685,14 @@ static const struct unidata st131[] = { {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, -{0,0,0,0,0,9,Mn,0}, +{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,No,0}, {0,0,0,0,0,0,No,0}, {0,0,0,0,0,0,No,0}, @@ -22250,20 +22250,20 @@ static const struct unidata st135[] = { {cd3086,cd3086,0,0,0,0,So,0}, {cd3087,cd3087,0,0,0,0,So,0}, {cd3088,cd3088,0,0,0,0,So,0}, +{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,216,Mc,0}, -{0,0,0,0,0,216,Mc,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, -{0,0,0,0,0,1,Mn,0}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,226,Mc,0}, -{0,0,0,0,0,216,Mc,0}, -{0,0,0,0,0,216,Mc,0}, -{0,0,0,0,0,216,Mc,0}, -{0,0,0,0,0,216,Mc,0}, -{0,0,0,0,0,216,Mc,0}, +{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend}, +{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend}, +{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend}, +{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend}, +{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend}, {0,0,0,0,0,0,Cf,0}, {0,0,0,0,0,0,Cf,0}, {0,0,0,0,0,0,Cf,0}, @@ -22272,25 +22272,25 @@ static const struct unidata st135[] = { {0,0,0,0,0,0,Cf,0}, {0,0,0,0,0,0,Cf,0}, {0,0,0,0,0,0,Cf,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0} +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend} }; static const struct unidata st136[] = { -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,220,Mn,0}, -{0,0,0,0,0,220,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, @@ -22321,10 +22321,10 @@ static const struct unidata st136[] = { {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, @@ -22475,9 +22475,9 @@ static const struct unidata st137[] = { {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,So,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, -{0,0,0,0,0,230,Mn,0}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, +{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend}, {0,0,0,0,0,0,So,0}, {0,0,0,0,0,0,Cn,0}, {0,0,0,0,0,0,Cn,0}, diff --git a/lib/unidata.h b/lib/unidata.h index 8595754..16ff5fa 100644 --- a/lib/unidata.h +++ b/lib/unidata.h @@ -34,7 +34,8 @@ enum unicode_gc_cat { unicode_gc_Zs }; enum unicode_flags { - unicode_normalize_before_casefold = 1 + unicode_normalize_before_casefold = 1, + unicode_grapheme_break_extend = 2 }; struct unidata { diff --git a/lib/words.c b/lib/words.c index 7d0d779..01c9db2 100644 --- a/lib/words.c +++ b/lib/words.c @@ -41,7 +41,6 @@ const char *casefold(const char *ptr) { static enum unicode_gc_cat cat(uint32_t c) { if(c < UNICODE_NCHARS) { - /* If this a known character, convert it to lower case */ const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS]; return ud->gc; } else diff --git a/scripts/make-unidata b/scripts/make-unidata index 8f58c08..f04dc30 100755 --- a/scripts/make-unidata +++ b/scripts/make-unidata @@ -44,6 +44,7 @@ # - ... # use strict; +use File::Basename; sub out { print @_ or die "$!\n"; @@ -74,15 +75,17 @@ my $minld = 0; # max/min lower case offset # Unicode standard version to make sure that a given version of DisOrder # supports a given version of Unicode. sub need_input { - my $f = shift; - if(!-e $f) { - system("wget http://www.unicode.org/Public/5.0.0/ucd/$f"); - chmod(0444, $f); + my $path = shift; + my $lpath = basename($path); + if(!-e $lpath) { + system("wget http://www.unicode.org/Public/5.0.0/ucd/$path"); + chmod(0444, $lpath) or die "$lpath: $!\n"; } } need_input("UnicodeData.txt"); need_input("CaseFolding.txt"); +need_input("auxiliary/GraphemeBreakProperty.txt"); # Read the main data file open(STDIN, ") { $max = $c if $c > $max; } +# Grapheme break data +# NB we do this BEFORE filling in blanks so that the Hangul characters +# don't get filled in; we can compute their properties mechanically. +open(STDIN, ") { + chomp; + s/\s*\#.*//; + next if $_ eq ''; + my ($range, $propval) = split(/\s*;\s*/, $_); + if($range =~ /(.*)\.\.(.*)/) { + for my $c (hex($1) .. hex($2)) { + if(exists $data{$c}) { + $data{$c}->{gbreak} = $propval; + } + } + } else { + my $c = hex($range); + if(exists $data{$c}) { + $data{$c}->{gbreak} = $propval; + } + } +} + # Round up the maximum value to a whole number of subtables $max += ($modulus - 1) - ($max % $modulus); @@ -193,7 +219,8 @@ out("enum unicode_gc_cat {\n", map(" unicode_gc_$_", sort keys %cats)), "\n};\n"); out("enum unicode_flags {\n", - " unicode_normalize_before_casefold = 1\n", + " unicode_normalize_before_casefold = 1,\n", + " unicode_grapheme_break_extend = 2\n", "};\n", "\n"); @@ -317,9 +344,18 @@ for(my $base = 0; $base <= $max; $base += $modulus) { my $canonsym = ($data{$c}->{canonsym} or "0"); my $compatsym = ($data{$c}->{compatsym} or "0"); my $cfsym = ($data{$c}->{cfsym} or "0"); - my $flags = ($data{$c}->{ypogegrammeni} - ? "unicode_normalize_before_casefold" - : 0); + my @flags = (); + if($data{$c}->{ypogegrammeni}) { + push(@flags, "unicode_normalize_before_casefold"); + } + # Currently we only store the Extend class, using a bit that would + # otherwise be wasted. The other classes are readily computable. + # If there is a conveninet way to compute Extend at runtime I have + # yet to discover it. + if(exists $data{$c}->{gbreak} and $data{$c}->{gbreak} eq 'Extend') { + push(@flags, "unicode_grapheme_break_extend"); + } + my $flags = @flags ? join("|", @flags) : 0; push(@t, "{". join(",", $compatsym,