chiark / gitweb /
start on ut32__unidata() which will provide a uniform interface
authorRichard Kettlewell <rjk@greenend.org.uk>
Sun, 18 Nov 2007 14:26:59 +0000 (14:26 +0000)
committerRichard Kettlewell <rjk@greenend.org.uk>
Sun, 18 Nov 2007 14:26:59 +0000 (14:26 +0000)
lib/test.c
lib/unicode.c
lib/unidata.c
scripts/make-unidata

index 69ca609af532590af3e5ab3f331f44f58a2556f8..62494449ccbe5256ace2ee42e0d8f5c2f5d912ca 100644 (file)
@@ -567,11 +567,11 @@ static void test_unicode(void) {
       fprintf(stderr,                                           \
               "NormalizationTest.txt:%d: c%d != "#T"(c%d)\n",   \
               lineno, A, B);                                    \
-      fprintf(stderr, "    c%d:      %s\n",                    \
+      fprintf(stderr, "      c%d: %s\n",                         \
               A, format_utf32(c[A]));                          \
       fprintf(stderr, "%4s(c%d): %s\n",                                \
               #T, B, format_utf32(T##_c[B]));                  \
-      count_error();                                                   \
+      count_error();                                           \
     }                                                          \
   } while(0)
     unt_check(NFD, 3, 1);
index 618ff06b64ec5a15c908c53015503c0cb721a915..5e2dacd591c42520f9a2961662e293f66e7e0ee3 100644 (file)
@@ -204,14 +204,26 @@ size_t utf32_len(const uint32_t *s) {
   return (size_t)(t - s);
 }
 
+/** @brief Return the @ref unidata structure for code point @p c
+ *
+ * @p c can be any 32-bit value, a sensible value will be returned regardless.
+ */
+static const struct unidata *utf32__unidata(uint32_t c) {
+  if(c < UNICODE_NCHARS)
+    return &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS];
+  else if((c >= 0xF0000 && c <= 0xFFFFD)
+          || (c >= 0x100000 && c <= 0x10FFFD))
+    return utf32__unidata(0xE000);      /* Co */
+  else
+    return utf32__unidata(0xFFFF);      /* Cn */
+}
+
 /** @brief Return the combining class of @p c
  * @param c Code point
  * @return Combining class of @p c
  */
 static inline int utf32__combining_class(uint32_t c) {
-  if(c < UNICODE_NCHARS)
-    return unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].ccc;
-  return 0;
+  return utf32__unidata(c)->ccc;
 }
 
 /** @brief Stably sort [s,s+ns) into descending order of combining class
@@ -320,10 +332,7 @@ static int utf32__canonical_ordering(uint32_t *s, size_t ns) {
 
 /** @brief Guts of the decomposition lookup functions */
 #define utf32__decompose_one_generic(WHICH) do {                        \
-  const uint32_t *dc =                                                  \
-    (c < UNICODE_NCHARS                                                 \
-     ? unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].WHICH          \
-     : 0);                                                              \
+  const uint32_t *dc = utf32__unidata(c)->WHICH;                        \
   if(dc) {                                                              \
     /* Found a canonical decomposition in the table */                  \
     while(*dc)                                                          \
@@ -425,10 +434,7 @@ uint32_t *utf32_decompose_compat(const uint32_t *s, size_t ns, size_t *ndp) {
 
 /** @brief Single-character case-fold and decompose operation */
 #define utf32__casefold_one(WHICH) do {                                 \
-  const uint32_t *cf =                                                  \
-     (c < UNICODE_NCHARS                                                \
-      ? unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].casefold      \
-      : 0);                                                             \
+  const uint32_t *cf = utf32__unidata(c)->casefold;                     \
   if(cf) {                                                              \
     /* Found a case-fold mapping in the table */                        \
     while(*cf)                                                          \
@@ -461,13 +467,9 @@ uint32_t *utf32_casefold_canon(const uint32_t *s, size_t ns, size_t *ndp) {
    * normalize before we fold.  In Unicode 5.0.0 this means 0345 COMBINING
    * GREEK YPOGEGRAMMENI in its decomposition and the various characters that
    * canonically decompose to it. */
-  for(n = 0; n < ns; ++n) {
-    c = s[n];
-    if(c < UNICODE_NCHARS
-       && (unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].flags
-           & unicode_normalize_before_casefold))
+  for(n = 0; n < ns; ++n)
+    if(utf32__unidata(s[n])->flags & unicode_normalize_before_casefold)
       break;
-  }
   if(n < ns) {
     /* We need a preliminary decomposition */
     if(!(ss = utf32_decompose_canon(s, ns, &ns)))
@@ -513,13 +515,9 @@ uint32_t *utf32_casefold_compat(const uint32_t *s, size_t ns, size_t *ndp) {
   size_t n;
   uint32_t *ss = 0;
 
-  for(n = 0; n < ns; ++n) {
-    c = s[n];
-    if(c < UNICODE_NCHARS
-       && (unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].flags
-           & unicode_normalize_before_casefold))
+  for(n = 0; n < ns; ++n)
+    if(utf32__unidata(s[n])->flags & unicode_normalize_before_casefold)
       break;
-  }
   if(n < ns) {
     /* We need a preliminary _canonical_ decomposition */
     if(!(ss = utf32_decompose_canon(s, ns, &ns)))
@@ -578,11 +576,7 @@ int utf32_cmp(const uint32_t *a, const uint32_t *b) {
  * @return General_Category property value
  */
 static inline enum unicode_General_Category utf32__general_category(uint32_t c) {
-  if(c < UNICODE_NCHARS) {
-    const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS];
-    return ud->general_category;
-  } else
-    return unicode_General_Category_Cn;
+  return utf32__unidata(c)->general_category;
 }
 
 /** @brief Check Grapheme_Cluster_Break property
@@ -642,12 +636,9 @@ static uint32_t utf32__hangul_syllable_type(uint32_t c) {
  * @return Word_Break property value of @p c
  */
 static enum unicode_Word_Break utf32__word_break(uint32_t c) {
-  if(c < 0xAC00 || c > 0xD7A3) {
-    if(c < UNICODE_NCHARS)
-      return unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].word_break;
-    else
-      return unicode_Word_Break_Other;
-  } else
+  if(c < 0xAC00 || c > 0xD7A3)
+    return utf32__unidata(c)->word_break;
+  else
     return unicode_Word_Break_ALetter;
 }
 
index 1d952247ef142bc2dacbdb8043659197728277c2..b5c2f5fa3386b5a336fa9e3cc6014f7e87bd49c1 100644 (file)
@@ -17533,6 +17533,266 @@ static const struct unidata st98[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
 static const struct unidata st99[] = {
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}
+};
+static const struct unidata st100[] = {
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther},
+{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}
+};
+static const struct unidata st101[] = {
 {cd2130,cd2130,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2131,cd2131,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd1439,cd1439,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
@@ -17662,7 +17922,7 @@ static const struct unidata st99[] = {
 {cd2248,cd2248,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2249,cd2249,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}
 };
-static const struct unidata st100[] = {
+static const struct unidata st102[] = {
 {cd2250,cd2250,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd1318,cd1318,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2251,cd2251,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
@@ -17792,7 +18052,7 @@ static const struct unidata st100[] = {
 {cd2366,cd2366,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2367,cd2367,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}
 };
-static const struct unidata st101[] = {
+static const struct unidata st103[] = {
 {cd2368,cd2368,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2369,cd2369,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2370,cd2370,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
@@ -17922,7 +18182,7 @@ static const struct unidata st101[] = {
 {cd2464,cd2464,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2465,cd2465,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}
 };
-static const struct unidata st102[] = {
+static const struct unidata st104[] = {
 {cd2466,cd2466,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2467,cd2467,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2468,cd2468,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
@@ -18052,7 +18312,7 @@ static const struct unidata st102[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st103[] = {
+static const struct unidata st105[] = {
 {cd2531,0,cf882,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd2532,0,cf883,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd2533,0,cf884,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
@@ -18182,7 +18442,7 @@ static const struct unidata st103[] = {
 {cd2597,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2597,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}
 };
-static const struct unidata st104[] = {
+static const struct unidata st106[] = {
 {cd2597,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2597,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2598,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -18312,7 +18572,7 @@ static const struct unidata st104[] = {
 {cd2633,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2633,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}
 };
-static const struct unidata st105[] = {
+static const struct unidata st107[] = {
 {cd2634,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2635,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2636,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -18442,7 +18702,7 @@ static const struct unidata st105[] = {
 {cd2686,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2687,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}
 };
-static const struct unidata st106[] = {
+static const struct unidata st108[] = {
 {cd2688,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2692,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2693,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -18572,7 +18832,7 @@ static const struct unidata st106[] = {
 {cd2777,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2778,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}
 };
-static const struct unidata st107[] = {
+static const struct unidata st109[] = {
 {cd2779,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2780,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2781,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -18702,7 +18962,7 @@ static const struct unidata st107[] = {
 {cd2830,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2831,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}
 };
-static const struct unidata st108[] = {
+static const struct unidata st110[] = {
 {cd2832,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2833,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2834,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -18832,7 +19092,7 @@ static const struct unidata st108[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st109[] = {
+static const struct unidata st111[] = {
 {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend},
 {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend},
 {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend},
@@ -18962,7 +19222,7 @@ static const struct unidata st109[] = {
 {cd2953,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2954,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}
 };
-static const struct unidata st110[] = {
+static const struct unidata st112[] = {
 {cd2955,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2956,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {cd2956,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -19092,7 +19352,7 @@ static const struct unidata st110[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cf,0,GBControl,WBFormat,SBFormat}
 };
-static const struct unidata st111[] = {
+static const struct unidata st113[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {cd2907,0,0,0,0,0,Po,0,GBOther,WBOther,SBSTerm},
 {cd2994,0,0,0,0,0,Po,0,GBOther,WBOther,SBOther},
@@ -19222,7 +19482,7 @@ static const struct unidata st111[] = {
 {cd1840,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter},
 {cd1841,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter}
 };
-static const struct unidata st112[] = {
+static const struct unidata st114[] = {
 {cd1842,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter},
 {cd1843,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter},
 {cd1844,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter},
@@ -19352,7 +19612,7 @@ static const struct unidata st112[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st113[] = {
+static const struct unidata st115[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -19482,7 +19742,7 @@ static const struct unidata st113[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st114[] = {
+static const struct unidata st116[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -19612,7 +19872,7 @@ static const struct unidata st114[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st115[] = {
+static const struct unidata st117[] = {
 {0,0,0,0,0,0,Po,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Po,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
@@ -19742,7 +20002,7 @@ static const struct unidata st115[] = {
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st116[] = {
+static const struct unidata st118[] = {
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
@@ -19872,7 +20132,7 @@ static const struct unidata st116[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st117[] = {
+static const struct unidata st119[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -20002,7 +20262,7 @@ static const struct unidata st117[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st118[] = {
+static const struct unidata st120[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -20132,7 +20392,7 @@ static const struct unidata st118[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st119[] = {
+static const struct unidata st121[] = {
 {0,0,cf919,0,40,0,Lu,0,GBOther,WBALetter,SBUpper},
 {0,0,cf920,0,40,0,Lu,0,GBOther,WBALetter,SBUpper},
 {0,0,cf921,0,40,0,Lu,0,GBOther,WBALetter,SBUpper},
@@ -20262,7 +20522,7 @@ static const struct unidata st119[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}
 };
-static const struct unidata st120[] = {
+static const struct unidata st122[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -20392,7 +20652,7 @@ static const struct unidata st120[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st121[] = {
+static const struct unidata st123[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -20522,7 +20782,7 @@ static const struct unidata st121[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st122[] = {
+static const struct unidata st124[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -20652,7 +20912,7 @@ static const struct unidata st122[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st123[] = {
+static const struct unidata st125[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend},
 {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend},
@@ -20782,7 +21042,7 @@ static const struct unidata st123[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st124[] = {
+static const struct unidata st126[] = {
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter},
@@ -20912,7 +21172,7 @@ static const struct unidata st124[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st125[] = {
+static const struct unidata st127[] = {
 {0,0,0,0,0,0,Nl,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Nl,0,GBOther,WBALetter,SBOLetter},
 {0,0,0,0,0,0,Nl,0,GBOther,WBALetter,SBOLetter},
@@ -21042,7 +21302,7 @@ static const struct unidata st125[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st126[] = {
+static const struct unidata st128[] = {
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
@@ -21172,7 +21432,7 @@ static const struct unidata st126[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st127[] = {
+static const struct unidata st129[] = {
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
@@ -21302,7 +21562,7 @@ static const struct unidata st127[] = {
 {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend},
 {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend}
 };
-static const struct unidata st128[] = {
+static const struct unidata st130[] = {
 {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend},
 {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend},
 {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend},
@@ -21432,7 +21692,7 @@ static const struct unidata st128[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st129[] = {
+static const struct unidata st131[] = {
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
@@ -21562,7 +21822,7 @@ static const struct unidata st129[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st130[] = {
+static const struct unidata st132[] = {
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther},
@@ -21692,7 +21952,7 @@ static const struct unidata st130[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-static const struct unidata st131[] = {
+static const struct unidata st133[] = {
 {cd491,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd493,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd1086,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
@@ -21822,7 +22082,7 @@ static const struct unidata st131[] = {
 {cd511,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd1131,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}
 };
-static const struct unidata st132[] = {
+static const struct unidata st134[] = {
 {cd1271,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd1098,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd2,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
@@ -21952,7 +22212,7 @@ static const struct unidata st132[] = {
 {cd530,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd533,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}
 };
-static const struct unidata st133[] = {
+static const struct unidata st135[] = {
 {cd291,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd301,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd292,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
@@ -22082,7 +22342,7 @@ static const struct unidata st133[] = {
 {cd1270,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd509,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}
 };
-static const struct unidata st134[] = {
+static const struct unidata st136[] = {
 {cd510,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd1126,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd511,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
@@ -22212,7 +22472,7 @@ static const struct unidata st134[] = {
 {cd1272,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd287,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}
 };
-static const struct unidata st135[] = {
+static const struct unidata st137[] = {
 {cd180,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd529,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd530,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
@@ -22342,7 +22602,7 @@ static const struct unidata st135[] = {
 {cd505,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd507,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}
 };
-static const struct unidata st136[] = {
+static const struct unidata st138[] = {
 {cd1094,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd508,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
 {cd1270,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper},
@@ -22472,7 +22732,7 @@ static const struct unidata st136[] = {
 {cd535,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd536,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}
 };
-static const struct unidata st137[] = {
+static const struct unidata st139[] = {
 {cd343,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd3118,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd3119,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
@@ -22602,7 +22862,7 @@ static const struct unidata st137[] = {
 {cd3123,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd338,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}
 };
-static const struct unidata st138[] = {
+static const struct unidata st140[] = {
 {cd340,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd341,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
 {cd3124,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower},
@@ -22732,7 +22992,7 @@ static const struct unidata st138[] = {
 {cd1075,0,0,0,0,0,Nd,0,GBOther,WBNumeric,SBNumeric},
 {cd1076,0,0,0,0,0,Nd,0,GBOther,WBNumeric,SBNumeric}
 };
-static const struct unidata st139[] = {
+static const struct unidata st141[] = {
 {cd3138,cd3138,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3139,cd3139,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3140,cd3140,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
@@ -22862,7 +23122,7 @@ static const struct unidata st139[] = {
 {cd3243,cd3243,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3244,cd3244,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}
 };
-static const struct unidata st140[] = {
+static const struct unidata st142[] = {
 {cd3245,cd3245,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3246,cd3246,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3247,cd3247,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
@@ -22992,7 +23252,7 @@ static const struct unidata st140[] = {
 {cd3354,cd3354,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3355,cd3355,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}
 };
-static const struct unidata st141[] = {
+static const struct unidata st143[] = {
 {cd3356,cd3356,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2419,cd2419,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2318,cd2318,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
@@ -23122,7 +23382,7 @@ static const struct unidata st141[] = {
 {cd3463,cd3463,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3464,cd3464,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}
 };
-static const struct unidata st142[] = {
+static const struct unidata st144[] = {
 {cd3465,cd3465,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3466,cd3466,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3467,cd3467,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
@@ -23252,7 +23512,7 @@ static const struct unidata st142[] = {
 {cd2518,cd2518,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd2518,cd2518,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}
 };
-static const struct unidata st143[] = {
+static const struct unidata st145[] = {
 {cd3581,cd3581,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3582,cd3582,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
 {cd3583,cd3583,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter},
@@ -23382,7 +23642,7 @@ static const struct unidata st143[] = {
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther},
 {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}
 };
-const struct unidata*const unidata[]={
+const struct unidata *const unidata[]={
 st0,
 st1,
 st2,
@@ -23815,73 +24075,71 @@ st16,
 st16,
 st16,
 st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
-st16,
 st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st99,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
+st100,
 st100,
 st101,
 st102,
@@ -23899,23 +24157,25 @@ st113,
 st114,
 st115,
 st116,
-st16,
-st16,
 st117,
 st118,
+st16,
+st16,
 st119,
 st120,
+st121,
+st122,
 st16,
 st16,
 st16,
 st16,
 st16,
 st16,
-st121,
+st123,
 st16,
-st122,
+st124,
 st16,
-st123,
+st125,
 st16,
 st16,
 st16,
@@ -23965,9 +24225,9 @@ st40,
 st40,
 st40,
 st40,
-st124,
+st126,
 st16,
-st125,
+st127,
 st16,
 st16,
 st16,
@@ -24312,21 +24572,21 @@ st16,
 st16,
 st16,
 st67,
-st126,
-st127,
 st128,
 st129,
-st16,
 st130,
-st16,
 st131,
+st16,
 st132,
+st16,
 st133,
 st134,
 st135,
 st136,
 st137,
 st138,
+st139,
+st140,
 st16,
 st16,
 st16,
@@ -24903,9 +25163,9 @@ st16,
 st16,
 st16,
 st16,
-st139,
-st140,
 st141,
 st142,
 st143,
+st144,
+st145,
 };
index 46f972430a33cfebc4372eb0a8690569a7039f41..b78d289fb593f0143a9e7550aa61c19e3ee964fc 100755 (executable)
@@ -82,6 +82,7 @@ sub input {
        chmod(0444, $lpath) or die "$lpath: $!\n";
     }
     open(STDIN, "<$lpath") or die "$lpath: $!\n";
+    print STDERR "Reading $lpath...\n";
 }
 
 
@@ -94,8 +95,12 @@ while(<>) {
     # TODO justify this exclusion!
     my $name = $f[1];
     my $gc = $f[2];            # General_Category
+    # Variuos GCs we don't expect to see in UnicodeData.txt
     $cats{$gc} = 1;            # always record all GCs
     next if $name =~ /(first|last)>/i; # ignore placeholders
+    die "unexpected Cn" if $gc eq 'Cn';
+    die "unexpected Co" if $gc eq 'Cn';
+    die "unexpected Cs" if $gc eq 'Cs';
     my $ccc = $f[3];           # Canonical_Combining_Class
     my $dm = $f[5];            # Decomposition_Type + Decomposition_Mapping
     my $sum = hex($f[12]) || $c; # Simple_Uppercase_Mapping
@@ -193,18 +198,54 @@ for my $c (keys %data) {
 # Round up the maximum value to a whole number of subtables
 $max += ($modulus - 1) - ($max % $modulus);
 
-# Make sure there are no gaps
+# Surrogates
+my $Cs = {
+    "gc" => "Cs",              # UTF-16 surrogate
+    "ccc" => 0,
+    "ud" => 0,
+    "ld" => 0
+};
+for(my $c = 0xD800; $c <= 0xDFFF; ++$c) {
+    $data{$c} = $Cs;
+}
+
+# Private use characters
+# We only fill in values below $max, utf32__unidata() 
+my $Co = {
+    "gc" => "Co",
+    "ccc" => 0,
+    "ud" => 0,
+    "ld" => 0
+};
+for(my $c = 0xE000; $c <= 0xF8FF && $c <= $max; ++$c) {
+    $data{$c} = $Co;
+}
+for(my $c = 0xF0000; $c <= 0xFFFFD && $c <= $max; ++$c) {
+    $data{$c} = $Co;
+}
+for(my $c = 0x100000; $c <= 0x10FFFD && $c <= $max; ++$c) {
+    $data{$c} = $Co;
+}
+
+# Anything left is not assigned
+my $Cn = {
+    "gc" => "Cn",              # not assigned
+    "ccc" => 0,
+    "ud" => 0,
+    "ld" => 0
+};
 for(my $c = 0; $c <= $max; ++$c) {
     if(!exists $data{$c}) {
-       $data{$c} = {
-           "gc" => "Cn",       # not assigned
-           "ccc" => 0,
-           "ud" => 0,
-           "ld" => 0,
-           "wbreak" => 'Other',
-           "gbreak" => 'Other',
-           "sbreak" => 'Other',
-           };
+       $data{$c} = $Cn;
+    }
+    if(!exists $data{$c}->{wbreak}) {
+       $data{$c}->{wbreak} = 'Other';
+    }
+    if(!exists $data{$c}->{gbreak}) {
+       $data{$c}->{gbreak} = 'Other';
+    }
+    if(!exists $data{$c}->{sbreak}) {
+       $data{$c}->{sbreak} = 'Other';
     }
 }
 $cats{'Cn'} = 1;
@@ -247,6 +288,7 @@ while(<>) {
 }
 
 # Generate the header file
+print STDERR "Generating unidata.h...\n";
 open(STDOUT, ">unidata.h") or die "unidata.h: $!\n";
 
 out("/* Automatically generated file, see scripts/make-unidata */\n",
@@ -324,6 +366,7 @@ out("#endif\n");
 
 close STDOUT or die "unidata.h: $!\n";
 
+print STDERR "Generating unidata.c...\n";
 open(STDOUT, ">unidata.c") or die "unidata.c: $!\n";
 
 out("/* Automatically generated file, see scripts/make-unidata */\n",
@@ -367,7 +410,7 @@ for(my $c = 0; $c <= $max; ++$c) {
     # If canon is set then compat will be too and will be identical.
     # If compat is set the canon might be clear.  So we use the
     # compat version and fix up the symbols after.
-    if(exists $data{$c}->{compat}) {
+    if(exists $data{$c} && exists $data{$c}->{compat}) {
        my $s = join(",",
                     (map(hex($_), split(/\s+/, $data{$c}->{compat})), 0));
        if(!exists $decompnums{$s}) {
@@ -393,7 +436,7 @@ my %cfnums = ();
 my $cfsaved = 0;
 out("static const uint32_t ");
 for(my $c = 0; $c <= $max; ++$c) {
-    if(exists $data{$c}->{casefold}) {
+    if(exists $data{$c} && exists $data{$c}->{casefold}) {
        my $s = join(",",
                     (map(hex($_), split(/\s+/, $data{$c}->{casefold})), 0));
        if(!exists $cfnums{$s}) {
@@ -457,7 +500,7 @@ for(my $base = 0; $base <= $max; $base += $modulus) {
     $subtableno{$base} = $subtable{$t};
 }
 
-out("const struct unidata*const unidata[]={\n");
+out("const struct unidata *const unidata[]={\n");
 for(my $base = 0; $base <= $max; $base += $modulus) {
     #out("st$subtableno{$base} /* ".sprintf("%04x", $base)." */,\n");
     out("st$subtableno{$base},\n");
@@ -466,5 +509,6 @@ out("};\n");
 
 close STDOUT or die "unidata.c: $!\n";
 
-print STDERR "max=$max, subtables=$subtablecounter, subtablessaved=$subtablessaved\n";
+printf STDERR "max=%04X\n", $max;
+print STDERR "subtables=$subtablecounter, subtablessaved=$subtablessaved\n";
 print STDERR "decompsaved=$decompsaved cfsaved=$cfsaved\n";