| 1 | /* Automatically generated file, see scripts/make-unidata */ |
| 2 | #ifndef UNIDATA_H |
| 3 | #define UNIDATA_H |
| 4 | enum unicode_General_Category { |
| 5 | unicode_General_Category_Cc, |
| 6 | unicode_General_Category_Cf, |
| 7 | unicode_General_Category_Cn, |
| 8 | unicode_General_Category_Co, |
| 9 | unicode_General_Category_Cs, |
| 10 | unicode_General_Category_Ll, |
| 11 | unicode_General_Category_Lm, |
| 12 | unicode_General_Category_Lo, |
| 13 | unicode_General_Category_Lt, |
| 14 | unicode_General_Category_Lu, |
| 15 | unicode_General_Category_Mc, |
| 16 | unicode_General_Category_Me, |
| 17 | unicode_General_Category_Mn, |
| 18 | unicode_General_Category_Nd, |
| 19 | unicode_General_Category_Nl, |
| 20 | unicode_General_Category_No, |
| 21 | unicode_General_Category_Pc, |
| 22 | unicode_General_Category_Pd, |
| 23 | unicode_General_Category_Pe, |
| 24 | unicode_General_Category_Pf, |
| 25 | unicode_General_Category_Pi, |
| 26 | unicode_General_Category_Po, |
| 27 | unicode_General_Category_Ps, |
| 28 | unicode_General_Category_Sc, |
| 29 | unicode_General_Category_Sk, |
| 30 | unicode_General_Category_Sm, |
| 31 | unicode_General_Category_So, |
| 32 | unicode_General_Category_Zl, |
| 33 | unicode_General_Category_Zp, |
| 34 | unicode_General_Category_Zs |
| 35 | }; |
| 36 | enum unicode_Grapheme_Break { |
| 37 | unicode_Grapheme_Break_CR, |
| 38 | unicode_Grapheme_Break_Control, |
| 39 | unicode_Grapheme_Break_Extend, |
| 40 | unicode_Grapheme_Break_L, |
| 41 | unicode_Grapheme_Break_LF, |
| 42 | unicode_Grapheme_Break_LV, |
| 43 | unicode_Grapheme_Break_LVT, |
| 44 | unicode_Grapheme_Break_Other, |
| 45 | unicode_Grapheme_Break_T, |
| 46 | unicode_Grapheme_Break_V |
| 47 | }; |
| 48 | extern const char *const unicode_Grapheme_Break_names[]; |
| 49 | enum unicode_Word_Break { |
| 50 | unicode_Word_Break_ALetter, |
| 51 | unicode_Word_Break_Extend, |
| 52 | unicode_Word_Break_ExtendNumLet, |
| 53 | unicode_Word_Break_Format, |
| 54 | unicode_Word_Break_Katakana, |
| 55 | unicode_Word_Break_MidLetter, |
| 56 | unicode_Word_Break_MidNum, |
| 57 | unicode_Word_Break_Numeric, |
| 58 | unicode_Word_Break_Other |
| 59 | }; |
| 60 | extern const char *const unicode_Word_Break_names[]; |
| 61 | enum unicode_Sentence_Break { |
| 62 | unicode_Sentence_Break_ATerm, |
| 63 | unicode_Sentence_Break_Close, |
| 64 | unicode_Sentence_Break_Extend, |
| 65 | unicode_Sentence_Break_Format, |
| 66 | unicode_Sentence_Break_Lower, |
| 67 | unicode_Sentence_Break_Numeric, |
| 68 | unicode_Sentence_Break_OLetter, |
| 69 | unicode_Sentence_Break_Other, |
| 70 | unicode_Sentence_Break_STerm, |
| 71 | unicode_Sentence_Break_Sep, |
| 72 | unicode_Sentence_Break_Sp, |
| 73 | unicode_Sentence_Break_Upper |
| 74 | }; |
| 75 | extern const char *const unicode_Sentence_Break_names[]; |
| 76 | enum unicode_flags { |
| 77 | unicode_normalize_before_casefold = 1, |
| 78 | unicode_compatibility_decomposition = 2 |
| 79 | }; |
| 80 | |
| 81 | struct unidata { |
| 82 | const uint32_t *decomp; |
| 83 | const uint32_t *casefold; |
| 84 | const uint32_t *composed; |
| 85 | unsigned char ccc; |
| 86 | char general_category; |
| 87 | uint8_t flags; |
| 88 | char grapheme_break; |
| 89 | char word_break; |
| 90 | char sentence_break; |
| 91 | }; |
| 92 | extern const struct unidata *const unidata[]; |
| 93 | extern const struct unicode_utf8_row { |
| 94 | uint8_t count; |
| 95 | uint8_t min2, max2; |
| 96 | } unicode_utf8_valid[]; |
| 97 | #define UNICODE_NCHARS 1114112 |
| 98 | #define UNICODE_MODULUS 16 |
| 99 | #define UNICODE_BREAK_START 196608 |
| 100 | #define UNICODE_BREAK_END 917504 |
| 101 | #define UNICODE_BREAK_TOP 918016 |
| 102 | #endif |