From bcf9ed7f5b44c177d927d147f87c5c08e377adfa Mon Sep 17 00:00:00 2001 Message-Id: From: Mark Wooding Date: Sun, 18 Nov 2007 14:26:59 +0000 Subject: [PATCH 1/1] start on ut32__unidata() which will provide a uniform interface Organization: Straylight/Edgeware From: Richard Kettlewell --- lib/test.c | 4 +- lib/unicode.c | 59 +++-- lib/unidata.c | 508 ++++++++++++++++++++++++++++++++----------- scripts/make-unidata | 72 ++++-- 4 files changed, 469 insertions(+), 174 deletions(-) diff --git a/lib/test.c b/lib/test.c index 69ca609..6249444 100644 --- a/lib/test.c +++ b/lib/test.c @@ -567,11 +567,11 @@ static void test_unicode(void) { fprintf(stderr, \ "NormalizationTest.txt:%d: c%d != "#T"(c%d)\n", \ lineno, A, B); \ - fprintf(stderr, " c%d: %s\n", \ + fprintf(stderr, " c%d: %s\n", \ A, format_utf32(c[A])); \ fprintf(stderr, "%4s(c%d): %s\n", \ #T, B, format_utf32(T##_c[B])); \ - count_error(); \ + count_error(); \ } \ } while(0) unt_check(NFD, 3, 1); diff --git a/lib/unicode.c b/lib/unicode.c index 618ff06..5e2dacd 100644 --- a/lib/unicode.c +++ b/lib/unicode.c @@ -204,14 +204,26 @@ size_t utf32_len(const uint32_t *s) { return (size_t)(t - s); } +/** @brief Return the @ref unidata structure for code point @p c + * + * @p c can be any 32-bit value, a sensible value will be returned regardless. + */ +static const struct unidata *utf32__unidata(uint32_t c) { + if(c < UNICODE_NCHARS) + return &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS]; + else if((c >= 0xF0000 && c <= 0xFFFFD) + || (c >= 0x100000 && c <= 0x10FFFD)) + return utf32__unidata(0xE000); /* Co */ + else + return utf32__unidata(0xFFFF); /* Cn */ +} + /** @brief Return the combining class of @p c * @param c Code point * @return Combining class of @p c */ static inline int utf32__combining_class(uint32_t c) { - if(c < UNICODE_NCHARS) - return unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].ccc; - return 0; + return utf32__unidata(c)->ccc; } /** @brief Stably sort [s,s+ns) into descending order of combining class @@ -320,10 +332,7 @@ static int utf32__canonical_ordering(uint32_t *s, size_t ns) { /** @brief Guts of the decomposition lookup functions */ #define utf32__decompose_one_generic(WHICH) do { \ - const uint32_t *dc = \ - (c < UNICODE_NCHARS \ - ? unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].WHICH \ - : 0); \ + const uint32_t *dc = utf32__unidata(c)->WHICH; \ if(dc) { \ /* Found a canonical decomposition in the table */ \ while(*dc) \ @@ -425,10 +434,7 @@ uint32_t *utf32_decompose_compat(const uint32_t *s, size_t ns, size_t *ndp) { /** @brief Single-character case-fold and decompose operation */ #define utf32__casefold_one(WHICH) do { \ - const uint32_t *cf = \ - (c < UNICODE_NCHARS \ - ? unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].casefold \ - : 0); \ + const uint32_t *cf = utf32__unidata(c)->casefold; \ if(cf) { \ /* Found a case-fold mapping in the table */ \ while(*cf) \ @@ -461,13 +467,9 @@ uint32_t *utf32_casefold_canon(const uint32_t *s, size_t ns, size_t *ndp) { * normalize before we fold. In Unicode 5.0.0 this means 0345 COMBINING * GREEK YPOGEGRAMMENI in its decomposition and the various characters that * canonically decompose to it. */ - for(n = 0; n < ns; ++n) { - c = s[n]; - if(c < UNICODE_NCHARS - && (unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].flags - & unicode_normalize_before_casefold)) + for(n = 0; n < ns; ++n) + if(utf32__unidata(s[n])->flags & unicode_normalize_before_casefold) break; - } if(n < ns) { /* We need a preliminary decomposition */ if(!(ss = utf32_decompose_canon(s, ns, &ns))) @@ -513,13 +515,9 @@ uint32_t *utf32_casefold_compat(const uint32_t *s, size_t ns, size_t *ndp) { size_t n; uint32_t *ss = 0; - for(n = 0; n < ns; ++n) { - c = s[n]; - if(c < UNICODE_NCHARS - && (unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].flags - & unicode_normalize_before_casefold)) + for(n = 0; n < ns; ++n) + if(utf32__unidata(s[n])->flags & unicode_normalize_before_casefold) break; - } if(n < ns) { /* We need a preliminary _canonical_ decomposition */ if(!(ss = utf32_decompose_canon(s, ns, &ns))) @@ -578,11 +576,7 @@ int utf32_cmp(const uint32_t *a, const uint32_t *b) { * @return General_Category property value */ static inline enum unicode_General_Category utf32__general_category(uint32_t c) { - if(c < UNICODE_NCHARS) { - const struct unidata *const ud = &unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS]; - return ud->general_category; - } else - return unicode_General_Category_Cn; + return utf32__unidata(c)->general_category; } /** @brief Check Grapheme_Cluster_Break property @@ -642,12 +636,9 @@ static uint32_t utf32__hangul_syllable_type(uint32_t c) { * @return Word_Break property value of @p c */ static enum unicode_Word_Break utf32__word_break(uint32_t c) { - if(c < 0xAC00 || c > 0xD7A3) { - if(c < UNICODE_NCHARS) - return unidata[c / UNICODE_MODULUS][c % UNICODE_MODULUS].word_break; - else - return unicode_Word_Break_Other; - } else + if(c < 0xAC00 || c > 0xD7A3) + return utf32__unidata(c)->word_break; + else return unicode_Word_Break_ALetter; } diff --git a/lib/unidata.c b/lib/unidata.c index 1d95224..b5c2f5f 100644 --- a/lib/unidata.c +++ b/lib/unidata.c @@ -17533,6 +17533,266 @@ static const struct unidata st98[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; static const struct unidata st99[] = { +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Cs,0,GBOther,WBOther,SBOther} +}; +static const struct unidata st100[] = { +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther}, +{0,0,0,0,0,0,Co,0,GBOther,WBOther,SBOther} +}; +static const struct unidata st101[] = { {cd2130,cd2130,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2131,cd2131,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd1439,cd1439,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, @@ -17662,7 +17922,7 @@ static const struct unidata st99[] = { {cd2248,cd2248,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2249,cd2249,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter} }; -static const struct unidata st100[] = { +static const struct unidata st102[] = { {cd2250,cd2250,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd1318,cd1318,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2251,cd2251,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, @@ -17792,7 +18052,7 @@ static const struct unidata st100[] = { {cd2366,cd2366,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2367,cd2367,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter} }; -static const struct unidata st101[] = { +static const struct unidata st103[] = { {cd2368,cd2368,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2369,cd2369,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2370,cd2370,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, @@ -17922,7 +18182,7 @@ static const struct unidata st101[] = { {cd2464,cd2464,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2465,cd2465,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter} }; -static const struct unidata st102[] = { +static const struct unidata st104[] = { {cd2466,cd2466,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2467,cd2467,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2468,cd2468,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, @@ -18052,7 +18312,7 @@ static const struct unidata st102[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st103[] = { +static const struct unidata st105[] = { {cd2531,0,cf882,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd2532,0,cf883,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd2533,0,cf884,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, @@ -18182,7 +18442,7 @@ static const struct unidata st103[] = { {cd2597,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2597,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter} }; -static const struct unidata st104[] = { +static const struct unidata st106[] = { {cd2597,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2597,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2598,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -18312,7 +18572,7 @@ static const struct unidata st104[] = { {cd2633,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2633,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter} }; -static const struct unidata st105[] = { +static const struct unidata st107[] = { {cd2634,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2635,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2636,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -18442,7 +18702,7 @@ static const struct unidata st105[] = { {cd2686,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2687,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter} }; -static const struct unidata st106[] = { +static const struct unidata st108[] = { {cd2688,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2692,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2693,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -18572,7 +18832,7 @@ static const struct unidata st106[] = { {cd2777,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2778,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter} }; -static const struct unidata st107[] = { +static const struct unidata st109[] = { {cd2779,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2780,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2781,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -18702,7 +18962,7 @@ static const struct unidata st107[] = { {cd2830,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2831,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter} }; -static const struct unidata st108[] = { +static const struct unidata st110[] = { {cd2832,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2833,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2834,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -18832,7 +19092,7 @@ static const struct unidata st108[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st109[] = { +static const struct unidata st111[] = { {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend}, {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend}, {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend}, @@ -18962,7 +19222,7 @@ static const struct unidata st109[] = { {cd2953,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2954,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter} }; -static const struct unidata st110[] = { +static const struct unidata st112[] = { {cd2955,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2956,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {cd2956,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -19092,7 +19352,7 @@ static const struct unidata st110[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cf,0,GBControl,WBFormat,SBFormat} }; -static const struct unidata st111[] = { +static const struct unidata st113[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {cd2907,0,0,0,0,0,Po,0,GBOther,WBOther,SBSTerm}, {cd2994,0,0,0,0,0,Po,0,GBOther,WBOther,SBOther}, @@ -19222,7 +19482,7 @@ static const struct unidata st111[] = { {cd1840,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter}, {cd1841,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter} }; -static const struct unidata st112[] = { +static const struct unidata st114[] = { {cd1842,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter}, {cd1843,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter}, {cd1844,0,0,0,0,0,Lo,0,GBOther,WBKatakana,SBOLetter}, @@ -19352,7 +19612,7 @@ static const struct unidata st112[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st113[] = { +static const struct unidata st115[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -19482,7 +19742,7 @@ static const struct unidata st113[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st114[] = { +static const struct unidata st116[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -19612,7 +19872,7 @@ static const struct unidata st114[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st115[] = { +static const struct unidata st117[] = { {0,0,0,0,0,0,Po,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Po,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, @@ -19742,7 +20002,7 @@ static const struct unidata st115[] = { {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther} }; -static const struct unidata st116[] = { +static const struct unidata st118[] = { {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, @@ -19872,7 +20132,7 @@ static const struct unidata st116[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st117[] = { +static const struct unidata st119[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -20002,7 +20262,7 @@ static const struct unidata st117[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st118[] = { +static const struct unidata st120[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -20132,7 +20392,7 @@ static const struct unidata st118[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st119[] = { +static const struct unidata st121[] = { {0,0,cf919,0,40,0,Lu,0,GBOther,WBALetter,SBUpper}, {0,0,cf920,0,40,0,Lu,0,GBOther,WBALetter,SBUpper}, {0,0,cf921,0,40,0,Lu,0,GBOther,WBALetter,SBUpper}, @@ -20262,7 +20522,7 @@ static const struct unidata st119[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter} }; -static const struct unidata st120[] = { +static const struct unidata st122[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -20392,7 +20652,7 @@ static const struct unidata st120[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st121[] = { +static const struct unidata st123[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -20522,7 +20782,7 @@ static const struct unidata st121[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st122[] = { +static const struct unidata st124[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -20652,7 +20912,7 @@ static const struct unidata st122[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st123[] = { +static const struct unidata st125[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend}, {0,0,0,0,0,0,Mn,0,GBExtend,WBExtend,SBExtend}, @@ -20782,7 +21042,7 @@ static const struct unidata st123[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st124[] = { +static const struct unidata st126[] = { {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Lo,0,GBOther,WBALetter,SBOLetter}, @@ -20912,7 +21172,7 @@ static const struct unidata st124[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st125[] = { +static const struct unidata st127[] = { {0,0,0,0,0,0,Nl,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Nl,0,GBOther,WBALetter,SBOLetter}, {0,0,0,0,0,0,Nl,0,GBOther,WBALetter,SBOLetter}, @@ -21042,7 +21302,7 @@ static const struct unidata st125[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st126[] = { +static const struct unidata st128[] = { {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, @@ -21172,7 +21432,7 @@ static const struct unidata st126[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st127[] = { +static const struct unidata st129[] = { {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, @@ -21302,7 +21562,7 @@ static const struct unidata st127[] = { {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend}, {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend} }; -static const struct unidata st128[] = { +static const struct unidata st130[] = { {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend}, {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend}, {0,0,0,0,0,220,Mn,0,GBExtend,WBExtend,SBExtend}, @@ -21432,7 +21692,7 @@ static const struct unidata st128[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st129[] = { +static const struct unidata st131[] = { {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, @@ -21562,7 +21822,7 @@ static const struct unidata st129[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st130[] = { +static const struct unidata st132[] = { {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,So,0,GBOther,WBOther,SBOther}, @@ -21692,7 +21952,7 @@ static const struct unidata st130[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -static const struct unidata st131[] = { +static const struct unidata st133[] = { {cd491,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd493,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd1086,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, @@ -21822,7 +22082,7 @@ static const struct unidata st131[] = { {cd511,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd1131,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper} }; -static const struct unidata st132[] = { +static const struct unidata st134[] = { {cd1271,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd1098,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd2,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, @@ -21952,7 +22212,7 @@ static const struct unidata st132[] = { {cd530,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd533,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower} }; -static const struct unidata st133[] = { +static const struct unidata st135[] = { {cd291,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd301,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd292,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, @@ -22082,7 +22342,7 @@ static const struct unidata st133[] = { {cd1270,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd509,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper} }; -static const struct unidata st134[] = { +static const struct unidata st136[] = { {cd510,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd1126,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd511,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, @@ -22212,7 +22472,7 @@ static const struct unidata st134[] = { {cd1272,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd287,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower} }; -static const struct unidata st135[] = { +static const struct unidata st137[] = { {cd180,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd529,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd530,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, @@ -22342,7 +22602,7 @@ static const struct unidata st135[] = { {cd505,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd507,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper} }; -static const struct unidata st136[] = { +static const struct unidata st138[] = { {cd1094,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd508,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, {cd1270,0,0,0,0,0,Lu,0,GBOther,WBALetter,SBUpper}, @@ -22472,7 +22732,7 @@ static const struct unidata st136[] = { {cd535,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd536,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower} }; -static const struct unidata st137[] = { +static const struct unidata st139[] = { {cd343,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd3118,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd3119,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, @@ -22602,7 +22862,7 @@ static const struct unidata st137[] = { {cd3123,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd338,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower} }; -static const struct unidata st138[] = { +static const struct unidata st140[] = { {cd340,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd341,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, {cd3124,0,0,0,0,0,Ll,0,GBOther,WBALetter,SBLower}, @@ -22732,7 +22992,7 @@ static const struct unidata st138[] = { {cd1075,0,0,0,0,0,Nd,0,GBOther,WBNumeric,SBNumeric}, {cd1076,0,0,0,0,0,Nd,0,GBOther,WBNumeric,SBNumeric} }; -static const struct unidata st139[] = { +static const struct unidata st141[] = { {cd3138,cd3138,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3139,cd3139,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3140,cd3140,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, @@ -22862,7 +23122,7 @@ static const struct unidata st139[] = { {cd3243,cd3243,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3244,cd3244,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter} }; -static const struct unidata st140[] = { +static const struct unidata st142[] = { {cd3245,cd3245,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3246,cd3246,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3247,cd3247,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, @@ -22992,7 +23252,7 @@ static const struct unidata st140[] = { {cd3354,cd3354,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3355,cd3355,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter} }; -static const struct unidata st141[] = { +static const struct unidata st143[] = { {cd3356,cd3356,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2419,cd2419,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2318,cd2318,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, @@ -23122,7 +23382,7 @@ static const struct unidata st141[] = { {cd3463,cd3463,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3464,cd3464,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter} }; -static const struct unidata st142[] = { +static const struct unidata st144[] = { {cd3465,cd3465,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3466,cd3466,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3467,cd3467,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, @@ -23252,7 +23512,7 @@ static const struct unidata st142[] = { {cd2518,cd2518,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd2518,cd2518,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter} }; -static const struct unidata st143[] = { +static const struct unidata st145[] = { {cd3581,cd3581,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3582,cd3582,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, {cd3583,cd3583,0,0,0,0,Lo,0,GBOther,WBOther,SBOLetter}, @@ -23382,7 +23642,7 @@ static const struct unidata st143[] = { {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther}, {0,0,0,0,0,0,Cn,0,GBOther,WBOther,SBOther} }; -const struct unidata*const unidata[]={ +const struct unidata *const unidata[]={ st0, st1, st2, @@ -23815,73 +24075,71 @@ st16, st16, st16, st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, -st16, st99, +st99, +st99, +st99, +st99, +st99, +st99, +st99, +st99, +st99, +st99, +st99, +st99, +st99, +st99, +st99, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, +st100, st100, st101, st102, @@ -23899,23 +24157,25 @@ st113, st114, st115, st116, -st16, -st16, st117, st118, +st16, +st16, st119, st120, +st121, +st122, st16, st16, st16, st16, st16, st16, -st121, +st123, st16, -st122, +st124, st16, -st123, +st125, st16, st16, st16, @@ -23965,9 +24225,9 @@ st40, st40, st40, st40, -st124, +st126, st16, -st125, +st127, st16, st16, st16, @@ -24312,21 +24572,21 @@ st16, st16, st16, st67, -st126, -st127, st128, st129, -st16, st130, -st16, st131, +st16, st132, +st16, st133, st134, st135, st136, st137, st138, +st139, +st140, st16, st16, st16, @@ -24903,9 +25163,9 @@ st16, st16, st16, st16, -st139, -st140, st141, st142, st143, +st144, +st145, }; diff --git a/scripts/make-unidata b/scripts/make-unidata index 46f9724..b78d289 100755 --- a/scripts/make-unidata +++ b/scripts/make-unidata @@ -82,6 +82,7 @@ sub input { chmod(0444, $lpath) or die "$lpath: $!\n"; } open(STDIN, "<$lpath") or die "$lpath: $!\n"; + print STDERR "Reading $lpath...\n"; } @@ -94,8 +95,12 @@ while(<>) { # TODO justify this exclusion! my $name = $f[1]; my $gc = $f[2]; # General_Category + # Variuos GCs we don't expect to see in UnicodeData.txt $cats{$gc} = 1; # always record all GCs next if $name =~ /(first|last)>/i; # ignore placeholders + die "unexpected Cn" if $gc eq 'Cn'; + die "unexpected Co" if $gc eq 'Cn'; + die "unexpected Cs" if $gc eq 'Cs'; my $ccc = $f[3]; # Canonical_Combining_Class my $dm = $f[5]; # Decomposition_Type + Decomposition_Mapping my $sum = hex($f[12]) || $c; # Simple_Uppercase_Mapping @@ -193,18 +198,54 @@ for my $c (keys %data) { # Round up the maximum value to a whole number of subtables $max += ($modulus - 1) - ($max % $modulus); -# Make sure there are no gaps +# Surrogates +my $Cs = { + "gc" => "Cs", # UTF-16 surrogate + "ccc" => 0, + "ud" => 0, + "ld" => 0 +}; +for(my $c = 0xD800; $c <= 0xDFFF; ++$c) { + $data{$c} = $Cs; +} + +# Private use characters +# We only fill in values below $max, utf32__unidata() +my $Co = { + "gc" => "Co", + "ccc" => 0, + "ud" => 0, + "ld" => 0 +}; +for(my $c = 0xE000; $c <= 0xF8FF && $c <= $max; ++$c) { + $data{$c} = $Co; +} +for(my $c = 0xF0000; $c <= 0xFFFFD && $c <= $max; ++$c) { + $data{$c} = $Co; +} +for(my $c = 0x100000; $c <= 0x10FFFD && $c <= $max; ++$c) { + $data{$c} = $Co; +} + +# Anything left is not assigned +my $Cn = { + "gc" => "Cn", # not assigned + "ccc" => 0, + "ud" => 0, + "ld" => 0 +}; for(my $c = 0; $c <= $max; ++$c) { if(!exists $data{$c}) { - $data{$c} = { - "gc" => "Cn", # not assigned - "ccc" => 0, - "ud" => 0, - "ld" => 0, - "wbreak" => 'Other', - "gbreak" => 'Other', - "sbreak" => 'Other', - }; + $data{$c} = $Cn; + } + if(!exists $data{$c}->{wbreak}) { + $data{$c}->{wbreak} = 'Other'; + } + if(!exists $data{$c}->{gbreak}) { + $data{$c}->{gbreak} = 'Other'; + } + if(!exists $data{$c}->{sbreak}) { + $data{$c}->{sbreak} = 'Other'; } } $cats{'Cn'} = 1; @@ -247,6 +288,7 @@ while(<>) { } # Generate the header file +print STDERR "Generating unidata.h...\n"; open(STDOUT, ">unidata.h") or die "unidata.h: $!\n"; out("/* Automatically generated file, see scripts/make-unidata */\n", @@ -324,6 +366,7 @@ out("#endif\n"); close STDOUT or die "unidata.h: $!\n"; +print STDERR "Generating unidata.c...\n"; open(STDOUT, ">unidata.c") or die "unidata.c: $!\n"; out("/* Automatically generated file, see scripts/make-unidata */\n", @@ -367,7 +410,7 @@ for(my $c = 0; $c <= $max; ++$c) { # If canon is set then compat will be too and will be identical. # If compat is set the canon might be clear. So we use the # compat version and fix up the symbols after. - if(exists $data{$c}->{compat}) { + if(exists $data{$c} && exists $data{$c}->{compat}) { my $s = join(",", (map(hex($_), split(/\s+/, $data{$c}->{compat})), 0)); if(!exists $decompnums{$s}) { @@ -393,7 +436,7 @@ my %cfnums = (); my $cfsaved = 0; out("static const uint32_t "); for(my $c = 0; $c <= $max; ++$c) { - if(exists $data{$c}->{casefold}) { + if(exists $data{$c} && exists $data{$c}->{casefold}) { my $s = join(",", (map(hex($_), split(/\s+/, $data{$c}->{casefold})), 0)); if(!exists $cfnums{$s}) { @@ -457,7 +500,7 @@ for(my $base = 0; $base <= $max; $base += $modulus) { $subtableno{$base} = $subtable{$t}; } -out("const struct unidata*const unidata[]={\n"); +out("const struct unidata *const unidata[]={\n"); for(my $base = 0; $base <= $max; $base += $modulus) { #out("st$subtableno{$base} /* ".sprintf("%04x", $base)." */,\n"); out("st$subtableno{$base},\n"); @@ -466,5 +509,6 @@ out("};\n"); close STDOUT or die "unidata.c: $!\n"; -print STDERR "max=$max, subtables=$subtablecounter, subtablessaved=$subtablessaved\n"; +printf STDERR "max=%04X\n", $max; +print STDERR "subtables=$subtablecounter, subtablessaved=$subtablessaved\n"; print STDERR "decompsaved=$decompsaved cfsaved=$cfsaved\n"; -- [mdw]