chiark / gitweb /
untested grapheme cluster boundary detection
authorRichard Kettlewell <rjk@greenend.org.uk>
Sat, 17 Nov 2007 22:01:22 +0000 (22:01 +0000)
committerRichard Kettlewell <rjk@greenend.org.uk>
Sat, 17 Nov 2007 22:01:22 +0000 (22:01 +0000)
.bzrignore
lib/unicode.c
lib/unicode.h
lib/unidata.c
lib/unidata.h
lib/words.c
scripts/make-unidata

index 2fbeedd73bc48f29ff07a8f824de05c3d484ea83..8bd29c106e2826cdeff44dd2093035a2db251695 100644 (file)
@@ -120,3 +120,4 @@ disobedience/disobedience.html
 lib/NormalizationTest.txt
 lib/CaseFolding.txt
 lib/UnicodeData.txt
+lib/GraphemeBreakProperty.txt
index 749916ad7a68de6a9ba96e8b4b8cdf680d25b68c..bac9e83c2e3bd9af467b494b6d6a2f2f0408111d 100644 (file)
@@ -21,7 +21,8 @@
  * @brief Unicode support functions
  *
  * Here by UTF-8 and UTF-8 we mean the encoding forms of those names (not the
- * encoding schemes).
+ * encoding schemes).  The primary encoding form is UTF-32 but convenience
+ * wrappers using UTF-8 are provided for a number of functions.
  *
  * The idea is that all the strings that hit the database will be in a
  * particular normalization form, and for the search and tags database
@@ -572,6 +573,125 @@ int utf32_cmp(const uint32_t *a, const uint32_t *b) {
   return *a < *b ? -1 : (*a > *b ? 1 : 0);
 }
 
+/** @brief Return the General_Category value for @p c
+ * @param Code point
+ * @return General_Category property value
+ */
+static inline enum unicode_gc_cat utf32__general_category(uint32_t c) {
+  if(c < UNICODE_NCHARS) {
+    const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS];
+    return ud->gc;
+  } else
+    return unicode_gc_Cn;
+}
+
+/** @brief Check Grapheme_Cluster_Break property
+ * @param c Code point
+ * @return 0 if it is as described, 1 otherwise
+ */
+static int utf32__is_control_or_cr_or_lf(uint32_t c) {
+  switch(utf32__general_category(c)) {
+  default:
+    return 0;
+  case unicode_gc_Zl:
+  case unicode_gc_Zp:
+  case unicode_gc_Cc:
+    return 1;
+  case unicode_gc_Cf:
+    if(c == 0x200C || c == 0x200D)
+      return 0;
+    return 1;
+  }
+}
+
+#define Hangul_Syllable_Type_NA 0
+#define Hangul_Syllable_Type_L 0x1100
+#define Hangul_Syllable_Type_V 0x1160
+#define Hangul_Syllable_Type_T 0x11A8
+#define Hangul_Syllable_Type_LV 0xAC00
+#define Hangul_Syllable_Type_LVT 0xAC01
+
+/** @brief Determine Hangul_Syllable_Type of @p c
+ * @param c Code point
+ * @return Equivalance class of @p c, or Hangul_Syllable_Type_NA 
+ *
+ * If this is a Hangul character then a representative member of its
+ * equivalence class is returned.  Otherwise Hangul_Syllable_Type_NA is
+ * returned.
+ */
+static uint32_t utf32__hangul_syllable_type(uint32_t c) {
+  /* Dispose of the bulk of the non-Hangul code points first */
+  if(c < 0x1100) return Hangul_Syllable_Type_NA;
+  if(c > 0x1200 && c < 0xAC00) return Hangul_Syllable_Type_NA;
+  if(c >= 0xD800) return Hangul_Syllable_Type_NA;
+  /* Now we pick out the assigned Hangul code points */
+  if((c >= 0x1100 && c <= 0x1159) || c == 0x115F) return Hangul_Syllable_Type_L;
+  if(c >= 0x1160 && c <= 0x11A2) return Hangul_Syllable_Type_V;
+  if(c >= 0x11A8 && c <= 0x11F9) return Hangul_Syllable_Type_T;
+  if(c >= 0xAC00 && c <= 0xD7A3) {
+    if(c % 28 == 16)
+      return Hangul_Syllable_Type_LV;
+    else
+      return Hangul_Syllable_Type_LVT;
+  }
+  return Hangul_Syllable_Type_NA;
+}
+
+/** @brief Identify a grapheme cluster boundary
+ * @param s Start of string (must be NFD)
+ * @param ns Length of string
+ * @param n Index within string (in [0,ns].)
+ * @return 1 at a grapheme cluster boundary, 0 otherwise
+ *
+ * This function identifies default grapheme cluster boundaries as described in
+ * UAX #29 s3.  It returns 1 if @p n points at the code point just after a
+ * grapheme cluster boundary (including the hypothetical code point just after
+ * the end of the string).
+ *
+ * The string must be in NFD (or NFKD) for this function to work (currently).
+ */
+int utf32_is_gcb(const uint32_t *s, size_t ns, size_t n) {
+  uint32_t before, after;
+  uint32_t hbefore, hafter;
+  /* GB1 and GB2 */
+  if(n == 0 || n == ns)
+    return 1;
+  /* Now we know that s[n-1] and s[n] are safe to inspect */
+  /* GB3 */
+  before = s[n-1];
+  after = s[n];
+  if(before == 0x000D && after == 0x000A)
+    return 0;
+  /* GB4 and GB5 */
+  if(utf32__is_control_or_cr_or_lf(before)
+     || utf32__is_control_or_cr_or_lf(after))
+    return 1;
+  hbefore = utf32__hangul_syllable_type(before);
+  hafter = utf32__hangul_syllable_type(after);
+  /* GB6 */
+  if(hbefore == Hangul_Syllable_Type_L
+     && hafter != Hangul_Syllable_Type_NA)
+    return 0;
+  /* GB7 */
+  if((hbefore == Hangul_Syllable_Type_LV
+      || hbefore == Hangul_Syllable_Type_V)
+     && (hafter == Hangul_Syllable_Type_V
+         || hafter == Hangul_Syllable_Type_T))
+    return 0;
+  /* GB8 */
+  if((hbefore == Hangul_Syllable_Type_LVT
+      || hbefore == Hangul_Syllable_Type_T)
+     && hafter == Hangul_Syllable_Type_T)
+    return 0;
+  /* GB9 */
+  if(after < UNICODE_NCHARS
+     && (unidata[after / UNICODE_MODULUS][after % UNICODE_MODULUS].flags
+         & unicode_grapheme_break_extend))
+    return 0;
+  /* GB10 */
+  return 1;
+}
+
 /*@}*/
 /** @defgroup Functions that operate on UTF-8 strings */
 /*@{*/
index ae7bb4e4793c6e7a2695bbfe7d0001427ad7dd1b..bf69cc3a06f25f7613849f13ba355aec2cb2996d 100644 (file)
@@ -42,6 +42,7 @@ char *utf8_casefold_canon(const char *s, size_t ns, size_t *ndp);
 uint32_t *utf32_casefold_compat(const uint32_t *s, size_t ns, size_t *ndp);
 char *utf8_casefold_compat(const char *s, size_t ns, size_t *ndp);
 
+int utf32_is_gcb(const uint32_t *s, size_t ns, size_t n);
 
 #endif /* UNICODE_H */
 
index 835113c8e369b85e5f407ba2c8bd9b84b1e29ec3..a73b159a08b31d061f0c71ab09060b375d0b8d13 100644 (file)
@@ -5379,118 +5379,118 @@ static const struct unidata st5[] = {
 {0,0,0,0,0,0,Sk,0}
 };
 static const struct unidata st6[] = {
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,232,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,232,Mn,0},
-{0,0,0,0,0,216,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,202,Mn,0},
-{0,0,0,0,0,202,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,202,Mn,0},
-{0,0,0,0,0,202,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{cd303,cd303,0,0,0,230,Mn,0},
-{cd304,cd304,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{cd305,cd305,0,0,0,230,Mn,0},
-{cd306,cd306,0,0,0,230,Mn,0},
-{0,0,cf226,84,0,240,Mn,unicode_normalize_before_casefold},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,232,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,233,Mn,0},
-{0,0,0,0,0,234,Mn,0},
-{0,0,0,0,0,234,Mn,0},
-{0,0,0,0,0,233,Mn,0},
-{0,0,0,0,0,234,Mn,0},
-{0,0,0,0,0,234,Mn,0},
-{0,0,0,0,0,233,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,232,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,232,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,216,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,202,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,202,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,202,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,202,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{cd303,cd303,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{cd304,cd304,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{cd305,cd305,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{cd306,cd306,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,cf226,84,0,240,Mn,unicode_normalize_before_casefold|unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,232,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,233,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,234,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,234,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,233,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,234,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,234,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,233,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -5772,13 +5772,13 @@ static const struct unidata st9[] = {
 {0,0,cf342,0,1,0,Lu,0},
 {0,0,0,-1,0,0,Ll,0},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Me,0},
-{0,0,0,0,0,0,Me,0},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
 {0,0,cf343,0,1,0,Lu,0},
 {0,0,0,-1,0,0,Ll,0},
 {0,0,cf344,0,1,0,Lu,0},
@@ -6046,61 +6046,61 @@ static const struct unidata st11[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,222,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,222,Mn,0},
-{0,0,0,0,0,228,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,10,Mn,0},
-{0,0,0,0,0,11,Mn,0},
-{0,0,0,0,0,12,Mn,0},
-{0,0,0,0,0,13,Mn,0},
-{0,0,0,0,0,14,Mn,0},
-{0,0,0,0,0,15,Mn,0},
-{0,0,0,0,0,16,Mn,0},
-{0,0,0,0,0,17,Mn,0},
-{0,0,0,0,0,18,Mn,0},
-{0,0,0,0,0,19,Mn,0},
-{0,0,0,0,0,19,Mn,0},
-{0,0,0,0,0,20,Mn,0},
-{0,0,0,0,0,21,Mn,0},
-{0,0,0,0,0,22,Mn,0},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,222,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,222,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,228,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,10,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,11,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,12,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,13,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,14,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,15,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,16,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,17,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,18,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,19,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,19,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,20,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,21,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,22,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Po,0},
-{0,0,0,0,0,23,Mn,0},
+{0,0,0,0,0,23,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Po,0},
-{0,0,0,0,0,24,Mn,0},
-{0,0,0,0,0,25,Mn,0},
+{0,0,0,0,0,24,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,25,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Po,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Po,0},
-{0,0,0,0,0,18,Mn,0},
+{0,0,0,0,0,18,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -6175,12 +6175,12 @@ static const struct unidata st12[] = {
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -6234,26 +6234,26 @@ static const struct unidata st12[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,27,Mn,0},
-{0,0,0,0,0,28,Mn,0},
-{0,0,0,0,0,29,Mn,0},
-{0,0,0,0,0,30,Mn,0},
-{0,0,0,0,0,31,Mn,0},
-{0,0,0,0,0,32,Mn,0},
-{0,0,0,0,0,33,Mn,0},
-{0,0,0,0,0,34,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,27,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,28,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,29,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,30,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,31,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,32,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,33,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,34,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Nd,0},
 {0,0,0,0,0,0,Nd,0},
@@ -6271,7 +6271,7 @@ static const struct unidata st12[] = {
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,35,Mn,0},
+{0,0,0,0,0,35,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
@@ -6375,30 +6375,30 @@ static const struct unidata st13[] = {
 {cd409,cd409,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cf,0},
-{0,0,0,0,0,0,Me,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lm,0},
 {0,0,0,0,0,0,Lm,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Nd,0},
@@ -6436,7 +6436,7 @@ static const struct unidata st14[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cf,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,36,Mn,0},
+{0,0,0,0,0,36,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
@@ -6467,33 +6467,33 @@ static const struct unidata st14[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Lo,0},
@@ -6587,17 +6587,17 @@ static const struct unidata st15[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -6656,15 +6656,15 @@ static const struct unidata st15[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lm,0},
 {0,0,0,0,0,0,Lm,0},
 {0,0,0,0,0,0,So,0},
@@ -6810,8 +6810,8 @@ static const struct unidata st16[] = {
 };
 static const struct unidata st17[] = {
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
@@ -6869,31 +6869,31 @@ static const struct unidata st17[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,7,Mn,0},
+{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -6907,8 +6907,8 @@ static const struct unidata st17[] = {
 {cd420,cd420,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Nd,0},
@@ -6940,7 +6940,7 @@ static const struct unidata st17[] = {
 };
 static const struct unidata st18[] = {
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Cn,0},
@@ -6999,15 +6999,15 @@ static const struct unidata st18[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,7,Mn,0},
+{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Mc,0},
@@ -7016,7 +7016,7 @@ static const struct unidata st18[] = {
 {0,0,0,0,0,0,Cn,0},
 {cd421,cd421,0,0,0,0,Mc,0},
 {cd422,cd422,0,0,0,0,Mc,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7026,7 +7026,7 @@ static const struct unidata st18[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mc,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7037,8 +7037,8 @@ static const struct unidata st18[] = {
 {cd425,cd425,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Nd,0},
@@ -7070,8 +7070,8 @@ static const struct unidata st18[] = {
 };
 static const struct unidata st19[] = {
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Lo,0},
@@ -7129,24 +7129,24 @@ static const struct unidata st19[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,7,Mn,0},
+{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7181,8 +7181,8 @@ static const struct unidata st19[] = {
 {0,0,0,0,0,0,Nd,0},
 {0,0,0,0,0,0,Nd,0},
 {0,0,0,0,0,0,Nd,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
@@ -7200,8 +7200,8 @@ static const struct unidata st19[] = {
 };
 static const struct unidata st20[] = {
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Lo,0},
@@ -7259,24 +7259,24 @@ static const struct unidata st20[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,7,Mn,0},
+{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Cn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Lo,0},
@@ -7297,8 +7297,8 @@ static const struct unidata st20[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Nd,0},
@@ -7330,7 +7330,7 @@ static const struct unidata st20[] = {
 };
 static const struct unidata st21[] = {
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7389,14 +7389,14 @@ static const struct unidata st21[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,7,Mn,0},
+{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7406,7 +7406,7 @@ static const struct unidata st21[] = {
 {0,0,0,0,0,0,Cn,0},
 {cd433,cd433,0,0,0,0,Mc,0},
 {cd434,cd434,0,0,0,0,Mc,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7415,8 +7415,8 @@ static const struct unidata st21[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mc,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7461,7 +7461,7 @@ static const struct unidata st21[] = {
 static const struct unidata st22[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Lo,0},
@@ -7521,9 +7521,9 @@ static const struct unidata st22[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7536,7 +7536,7 @@ static const struct unidata st22[] = {
 {cd438,cd438,0,0,0,0,Mc,0},
 {cd439,cd439,0,0,0,0,Mc,0},
 {cd440,cd440,0,0,0,0,Mc,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7546,7 +7546,7 @@ static const struct unidata st22[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mc,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7651,22 +7651,22 @@ static const struct unidata st23[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{cd441,cd441,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd441,cd441,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7674,8 +7674,8 @@ static const struct unidata st23[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,84,Mn,0},
-{0,0,0,0,0,91,Mn,0},
+{0,0,0,0,0,84,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,91,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7779,24 +7779,24 @@ static const struct unidata st24[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,7,Mn,0},
+{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {cd442,cd442,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mc,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {cd443,cd443,0,0,0,0,Mc,0},
 {cd444,cd444,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Cn,0},
 {cd445,cd445,0,0,0,0,Mc,0},
 {cd446,cd446,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7804,8 +7804,8 @@ static const struct unidata st24[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mc,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7817,8 +7817,8 @@ static const struct unidata st24[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Nd,0},
@@ -7911,12 +7911,12 @@ static const struct unidata st25[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Mc,0},
@@ -7926,7 +7926,7 @@ static const struct unidata st25[] = {
 {cd447,cd447,0,0,0,0,Mc,0},
 {cd448,cd448,0,0,0,0,Mc,0},
 {cd449,cd449,0,0,0,0,Mc,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -7936,7 +7936,7 @@ static const struct unidata st25[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mc,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -8053,19 +8053,19 @@ static const struct unidata st26[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
@@ -8074,7 +8074,7 @@ static const struct unidata st26[] = {
 {cd451,cd451,0,0,0,0,Mc,0},
 {cd452,cd452,0,0,0,0,Mc,0},
 {cd453,cd453,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mc,0},
+{0,0,0,0,0,0,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -8158,16 +8158,16 @@ static const struct unidata st27[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {cd454,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,103,Mn,0},
-{0,0,0,0,0,103,Mn,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,103,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,103,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -8180,14 +8180,14 @@ static const struct unidata st27[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lm,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,107,Mn,0},
-{0,0,0,0,0,107,Mn,0},
-{0,0,0,0,0,107,Mn,0},
-{0,0,0,0,0,107,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,107,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,107,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,107,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,107,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Nd,0},
 {0,0,0,0,0,0,Nd,0},
@@ -8288,18 +8288,18 @@ static const struct unidata st28[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {cd455,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,118,Mn,0},
-{0,0,0,0,0,118,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,118,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,118,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -8311,12 +8311,12 @@ static const struct unidata st28[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Lm,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,122,Mn,0},
-{0,0,0,0,0,122,Mn,0},
-{0,0,0,0,0,122,Mn,0},
-{0,0,0,0,0,122,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,122,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,122,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,122,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,122,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Nd,0},
@@ -8393,8 +8393,8 @@ static const struct unidata st29[] = {
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
@@ -8422,11 +8422,11 @@ static const struct unidata st29[] = {
 {0,0,0,0,0,0,No,0},
 {0,0,0,0,0,0,No,0},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,216,Mn,0},
+{0,0,0,0,0,216,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Ps,0},
 {0,0,0,0,0,0,Pe,0},
 {0,0,0,0,0,0,Ps,0},
@@ -8482,31 +8482,31 @@ static const struct unidata st29[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,129,Mn,0},
-{0,0,0,0,0,130,Mn,0},
-{cd465,cd465,0,0,0,0,Mn,0},
-{0,0,0,0,0,132,Mn,0},
-{cd466,cd466,0,0,0,0,Mn,0},
-{cd467,cd467,0,0,0,0,Mn,0},
-{cd468,0,0,0,0,0,Mn,0},
-{cd469,cd469,0,0,0,0,Mn,0},
-{cd470,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,130,Mn,0},
-{0,0,0,0,0,130,Mn,0},
-{0,0,0,0,0,130,Mn,0},
-{0,0,0,0,0,130,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,129,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend},
+{cd465,cd465,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,132,Mn,unicode_grapheme_break_extend},
+{cd466,cd466,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd467,cd467,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd468,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd469,cd469,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd470,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0}
 };
 static const struct unidata st30[] = {
-{0,0,0,0,0,130,Mn,0},
-{cd471,cd471,0,0,0,0,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,130,Mn,unicode_grapheme_break_extend},
+{cd471,cd471,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Po,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
@@ -8515,51 +8515,51 @@ static const struct unidata st30[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{cd472,cd472,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd472,cd472,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{cd473,cd473,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{cd474,cd474,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{cd475,cd475,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{cd476,cd476,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{cd477,cd477,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd473,cd473,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd474,cd474,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd475,cd475,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd476,cd476,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{cd477,cd477,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
@@ -8569,7 +8569,7 @@ static const struct unidata st30[] = {
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
@@ -8674,19 +8674,19 @@ static const struct unidata st31[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,7,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -8717,8 +8717,8 @@ static const struct unidata st31[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -9504,7 +9504,7 @@ static const struct unidata st37[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Po,0},
@@ -10207,9 +10207,9 @@ static const struct unidata st43[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -10239,9 +10239,9 @@ static const struct unidata st43[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Cn,0},
@@ -10271,8 +10271,8 @@ static const struct unidata st43[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -10303,8 +10303,8 @@ static const struct unidata st43[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -10374,13 +10374,13 @@ static const struct unidata st44[] = {
 {0,0,0,0,0,0,Cf,0},
 {0,0,0,0,0,0,Cf,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
@@ -10389,20 +10389,20 @@ static const struct unidata st44[] = {
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,9,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Po,0},
@@ -10412,7 +10412,7 @@ static const struct unidata st44[] = {
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Sc,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Nd,0},
@@ -10460,9 +10460,9 @@ static const struct unidata st45[] = {
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Po,0},
 {0,0,0,0,0,0,Po,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Zs,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Nd,0},
@@ -10620,7 +10620,7 @@ static const struct unidata st46[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,228,Mn,0},
+{0,0,0,0,0,228,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -10741,15 +10741,15 @@ static const struct unidata st47[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
@@ -10759,16 +10759,16 @@ static const struct unidata st47[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,222,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,222,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -10992,8 +10992,8 @@ static const struct unidata st49[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
@@ -11099,10 +11099,10 @@ static const struct unidata st49[] = {
 {0,0,0,0,0,0,Cn,0}
 };
 static const struct unidata st50[] = {
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Lo,0},
 {cd480,cd480,0,0,0,0,Lo,0},
@@ -11151,21 +11151,21 @@ static const struct unidata st50[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,7,Mn,0},
+{0,0,0,0,0,7,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {cd486,cd486,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {cd487,cd487,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
 {cd488,cd488,0,0,0,0,Mc,0},
 {cd489,cd489,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {cd490,cd490,0,0,0,0,Mc,0},
 {0,0,0,0,0,9,Mc,0},
 {0,0,0,0,0,0,Lo,0},
@@ -11206,15 +11206,15 @@ static const struct unidata st50[] = {
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
@@ -11423,17 +11423,17 @@ static const struct unidata st52[] = {
 {cd573,0,0,0,0,0,Lm,0},
 {cd574,0,0,0,0,0,Lm,0},
 {cd333,0,0,0,0,0,Lm,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -11485,8 +11485,8 @@ static const struct unidata st52[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0}
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}
 };
 static const struct unidata st53[] = {
 {cd575,cd575,cf489,0,1,0,Lu,0},
@@ -12021,8 +12021,8 @@ static const struct unidata st57[] = {
 {cd0,0,0,0,0,0,Zs,0},
 {cd0,0,0,0,0,0,Zs,0},
 {0,0,0,0,0,0,Cf,0},
-{0,0,0,0,0,0,Cf,0},
-{0,0,0,0,0,0,Cf,0},
+{0,0,0,0,0,0,Cf,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Cf,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cf,0},
 {0,0,0,0,0,0,Cf,0},
 {0,0,0,0,0,0,Pd,0},
@@ -12219,38 +12219,38 @@ static const struct unidata st58[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,0,Me,0},
-{0,0,0,0,0,0,Me,0},
-{0,0,0,0,0,0,Me,0},
-{0,0,0,0,0,0,Me,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,0,Me,0},
-{0,0,0,0,0,0,Me,0},
-{0,0,0,0,0,0,Me,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Me,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -15821,12 +15821,12 @@ static const struct unidata st86[] = {
 {0,0,0,0,0,0,Nl,0},
 {0,0,0,0,0,0,Nl,0},
 {0,0,0,0,0,0,Nl,0},
-{0,0,0,0,0,218,Mn,0},
-{0,0,0,0,0,228,Mn,0},
-{0,0,0,0,0,232,Mn,0},
-{0,0,0,0,0,222,Mn,0},
-{0,0,0,0,0,224,Mn,0},
-{0,0,0,0,0,224,Mn,0},
+{0,0,0,0,0,218,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,228,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,232,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,222,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,224,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,224,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Pd,0},
 {0,0,0,0,0,0,Lm,0},
 {0,0,0,0,0,0,Lm,0},
@@ -15934,8 +15934,8 @@ static const struct unidata st87[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,8,Mn,0},
-{0,0,0,0,0,8,Mn,0},
+{0,0,0,0,0,8,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,8,Mn,unicode_grapheme_break_extend},
 {cd1524,0,0,0,0,0,Sk,0},
 {cd1525,0,0,0,0,0,Sk,0},
 {0,0,0,0,0,0,Lm,0},
@@ -17605,12 +17605,12 @@ static const struct unidata st100[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
@@ -17636,8 +17636,8 @@ static const struct unidata st100[] = {
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,Mc,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Mc,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
@@ -19059,7 +19059,7 @@ static const struct unidata st111[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {cd2543,cd2543,0,0,0,0,Lo,0},
-{0,0,0,0,0,26,Mn,0},
+{0,0,0,0,0,26,Mn,unicode_grapheme_break_extend},
 {cd2544,cd2544,0,0,0,0,Lo,0},
 {cd2545,0,0,0,0,0,Lo,0},
 {cd1102,0,0,0,0,0,Lo,0},
@@ -19809,22 +19809,22 @@ static const struct unidata st116[] = {
 {0,0,0,0,0,0,Cn,0}
 };
 static const struct unidata st117[] = {
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {cd2903,0,0,0,0,0,Po,0},
 {cd2904,0,0,0,0,0,Po,0},
 {cd2905,0,0,0,0,0,Po,0},
@@ -19841,10 +19841,10 @@ static const struct unidata st117[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
@@ -21630,21 +21630,21 @@ static const struct unidata st130[] = {
 };
 static const struct unidata st131[] = {
 {0,0,0,0,0,0,Lo,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,0,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,0,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,0,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
 {0,0,0,0,0,0,Lo,0},
@@ -21685,14 +21685,14 @@ static const struct unidata st131[] = {
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
-{0,0,0,0,0,9,Mn,0},
+{0,0,0,0,0,9,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,No,0},
 {0,0,0,0,0,0,No,0},
 {0,0,0,0,0,0,No,0},
@@ -22250,20 +22250,20 @@ static const struct unidata st135[] = {
 {cd3086,cd3086,0,0,0,0,So,0},
 {cd3087,cd3087,0,0,0,0,So,0},
 {cd3088,cd3088,0,0,0,0,So,0},
+{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,216,Mc,0},
-{0,0,0,0,0,216,Mc,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
-{0,0,0,0,0,1,Mn,0},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,1,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,226,Mc,0},
-{0,0,0,0,0,216,Mc,0},
-{0,0,0,0,0,216,Mc,0},
-{0,0,0,0,0,216,Mc,0},
-{0,0,0,0,0,216,Mc,0},
-{0,0,0,0,0,216,Mc,0},
+{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend},
+{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend},
+{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend},
+{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend},
+{0,0,0,0,0,216,Mc,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,Cf,0},
 {0,0,0,0,0,0,Cf,0},
 {0,0,0,0,0,0,Cf,0},
@@ -22272,25 +22272,25 @@ static const struct unidata st135[] = {
 {0,0,0,0,0,0,Cf,0},
 {0,0,0,0,0,0,Cf,0},
 {0,0,0,0,0,0,Cf,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0}
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend}
 };
 static const struct unidata st136[] = {
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,220,Mn,0},
-{0,0,0,0,0,220,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,220,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
@@ -22321,10 +22321,10 @@ static const struct unidata st136[] = {
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
@@ -22475,9 +22475,9 @@ static const struct unidata st137[] = {
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,So,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
-{0,0,0,0,0,230,Mn,0},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
+{0,0,0,0,0,230,Mn,unicode_grapheme_break_extend},
 {0,0,0,0,0,0,So,0},
 {0,0,0,0,0,0,Cn,0},
 {0,0,0,0,0,0,Cn,0},
index 859575479c6401124aba03226f1ce731fafe14ce..16ff5faaa86241c1846808ae897c5329a8a8412f 100644 (file)
@@ -34,7 +34,8 @@ enum unicode_gc_cat {
   unicode_gc_Zs
 };
 enum unicode_flags {
-  unicode_normalize_before_casefold = 1
+  unicode_normalize_before_casefold = 1,
+  unicode_grapheme_break_extend = 2
 };
 
 struct unidata {
index 7d0d779630269b435259e9cf2fb63342082986cd..01c9db2c9e719b67d2fbef41cae620937aa7030d 100644 (file)
@@ -41,7 +41,6 @@ const char *casefold(const char *ptr) {
 
 static enum unicode_gc_cat cat(uint32_t c) {
   if(c < UNICODE_NCHARS) {
-    /* If this a known character, convert it to lower case */
     const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS];
     return ud->gc;
   } else
index 8f58c0838fc23184f8133857966a35203d1025c5..f04dc308fe5d16254a2d6f46d553319ac82f12d3 100755 (executable)
@@ -44,6 +44,7 @@
 #  - ...
 #
 use strict;
+use File::Basename;
 
 sub out {
     print @_ or die "$!\n";
@@ -74,15 +75,17 @@ my $minld = 0;                      # max/min lower case offset
 # Unicode standard version to make sure that a given version of DisOrder
 # supports a given version of Unicode.
 sub need_input {
-    my $f = shift;
-    if(!-e $f) {
-       system("wget http://www.unicode.org/Public/5.0.0/ucd/$f");
-       chmod(0444, $f);
+    my $path = shift;
+    my $lpath = basename($path);
+    if(!-e $lpath) {
+       system("wget http://www.unicode.org/Public/5.0.0/ucd/$path");
+       chmod(0444, $lpath) or die "$lpath: $!\n";
     }
 }
 
 need_input("UnicodeData.txt");
 need_input("CaseFolding.txt");
+need_input("auxiliary/GraphemeBreakProperty.txt");
 
 # Read the main data file
 open(STDIN, "<UnicodeData.txt") or die "UnicodeData.txt: $!\n";
@@ -127,6 +130,29 @@ while(<>) {
     $max = $c if $c > $max;
 }
 
+# Grapheme break data
+# NB we do this BEFORE filling in blanks so that the Hangul characters
+# don't get filled in; we can compute their properties mechanically.
+open(STDIN, "<GraphemeBreakProperty.txt") or die "GraphemeBreakProperty.txt: $!\n";
+while(<>) {
+    chomp;
+    s/\s*\#.*//;
+    next if $_ eq '';
+    my ($range, $propval) = split(/\s*;\s*/, $_);
+    if($range =~ /(.*)\.\.(.*)/) {
+       for my $c (hex($1) .. hex($2)) {
+           if(exists $data{$c}) {
+               $data{$c}->{gbreak} = $propval;
+           }
+       }
+    } else {
+       my $c = hex($range);
+       if(exists $data{$c}) {
+           $data{$c}->{gbreak} = $propval;
+       }
+    }
+}
+
 # Round up the maximum value to a whole number of subtables
 $max += ($modulus - 1) - ($max % $modulus);
 
@@ -193,7 +219,8 @@ out("enum unicode_gc_cat {\n",
         map("  unicode_gc_$_", sort keys %cats)), "\n};\n");
 
 out("enum unicode_flags {\n",
-    "  unicode_normalize_before_casefold = 1\n",
+    "  unicode_normalize_before_casefold = 1,\n",
+    "  unicode_grapheme_break_extend = 2\n",
     "};\n",
     "\n");
 
@@ -317,9 +344,18 @@ for(my $base = 0; $base <= $max; $base += $modulus) {
        my $canonsym = ($data{$c}->{canonsym} or "0");
        my $compatsym = ($data{$c}->{compatsym} or "0");
        my $cfsym = ($data{$c}->{cfsym} or "0");
-       my $flags = ($data{$c}->{ypogegrammeni}
-                    ? "unicode_normalize_before_casefold"
-                    : 0);
+       my @flags = ();
+       if($data{$c}->{ypogegrammeni}) {
+           push(@flags, "unicode_normalize_before_casefold");
+       }
+       # Currently we only store the Extend class, using a bit that would
+       # otherwise be wasted.  The other classes are readily computable.
+       # If there is a conveninet way to compute Extend at runtime I have
+       # yet to discover it.
+       if(exists $data{$c}->{gbreak} and $data{$c}->{gbreak} eq 'Extend') {
+           push(@flags, "unicode_grapheme_break_extend");
+       }
+       my $flags = @flags ? join("|", @flags) : 0;
        push(@t, "{".
             join(",",
                  $compatsym,