Commit | Line | Data |
---|---|---|
e5a5a138 | 1 | /* Automatically generated file, see scripts/make-unidata */ |
61507e3c RK |
2 | #ifndef UNIDATA_H |
3 | #define UNIDATA_H | |
14523635 RK |
4 | enum unicode_General_Category { |
5 | unicode_General_Category_Cc, | |
6 | unicode_General_Category_Cf, | |
7 | unicode_General_Category_Cn, | |
8 | unicode_General_Category_Co, | |
9 | unicode_General_Category_Cs, | |
10 | unicode_General_Category_Ll, | |
11 | unicode_General_Category_Lm, | |
12 | unicode_General_Category_Lo, | |
13 | unicode_General_Category_Lt, | |
14 | unicode_General_Category_Lu, | |
15 | unicode_General_Category_Mc, | |
16 | unicode_General_Category_Me, | |
17 | unicode_General_Category_Mn, | |
18 | unicode_General_Category_Nd, | |
19 | unicode_General_Category_Nl, | |
20 | unicode_General_Category_No, | |
21 | unicode_General_Category_Pc, | |
22 | unicode_General_Category_Pd, | |
23 | unicode_General_Category_Pe, | |
24 | unicode_General_Category_Pf, | |
25 | unicode_General_Category_Pi, | |
26 | unicode_General_Category_Po, | |
27 | unicode_General_Category_Ps, | |
28 | unicode_General_Category_Sc, | |
29 | unicode_General_Category_Sk, | |
30 | unicode_General_Category_Sm, | |
31 | unicode_General_Category_So, | |
32 | unicode_General_Category_Zl, | |
33 | unicode_General_Category_Zp, | |
34 | unicode_General_Category_Zs | |
61507e3c | 35 | }; |
349b7b74 RK |
36 | enum unicode_Grapheme_Break { |
37 | unicode_Grapheme_Break_CR, | |
38 | unicode_Grapheme_Break_Control, | |
39 | unicode_Grapheme_Break_Extend, | |
40 | unicode_Grapheme_Break_L, | |
41 | unicode_Grapheme_Break_LF, | |
0e843521 | 42 | unicode_Grapheme_Break_LV, |
1a05e381 | 43 | unicode_Grapheme_Break_LVT, |
349b7b74 RK |
44 | unicode_Grapheme_Break_Other, |
45 | unicode_Grapheme_Break_T, | |
46 | unicode_Grapheme_Break_V | |
47 | }; | |
48 | extern const char *const unicode_Grapheme_Break_names[]; | |
0b7052da RK |
49 | enum unicode_Word_Break { |
50 | unicode_Word_Break_ALetter, | |
51 | unicode_Word_Break_Extend, | |
52 | unicode_Word_Break_ExtendNumLet, | |
53 | unicode_Word_Break_Format, | |
54 | unicode_Word_Break_Katakana, | |
55 | unicode_Word_Break_MidLetter, | |
56 | unicode_Word_Break_MidNum, | |
57 | unicode_Word_Break_Numeric, | |
58 | unicode_Word_Break_Other | |
59 | }; | |
bb48024f | 60 | extern const char *const unicode_Word_Break_names[]; |
349b7b74 RK |
61 | enum unicode_Sentence_Break { |
62 | unicode_Sentence_Break_ATerm, | |
63 | unicode_Sentence_Break_Close, | |
64 | unicode_Sentence_Break_Extend, | |
65 | unicode_Sentence_Break_Format, | |
66 | unicode_Sentence_Break_Lower, | |
67 | unicode_Sentence_Break_Numeric, | |
68 | unicode_Sentence_Break_OLetter, | |
69 | unicode_Sentence_Break_Other, | |
70 | unicode_Sentence_Break_STerm, | |
71 | unicode_Sentence_Break_Sep, | |
72 | unicode_Sentence_Break_Sp, | |
73 | unicode_Sentence_Break_Upper | |
74 | }; | |
75 | extern const char *const unicode_Sentence_Break_names[]; | |
e5a5a138 | 76 | enum unicode_flags { |
f98fcddb RK |
77 | unicode_normalize_before_casefold = 1, |
78 | unicode_compatibility_decomposition = 2 | |
e5a5a138 RK |
79 | }; |
80 | ||
61507e3c | 81 | struct unidata { |
f98fcddb | 82 | const uint32_t *decomp; |
e5a5a138 | 83 | const uint32_t *casefold; |
99695df9 | 84 | const uint32_t *composed; |
e5a5a138 | 85 | unsigned char ccc; |
14523635 | 86 | char general_category; |
e5a5a138 | 87 | uint8_t flags; |
349b7b74 | 88 | char grapheme_break; |
0b7052da | 89 | char word_break; |
349b7b74 | 90 | char sentence_break; |
61507e3c RK |
91 | }; |
92 | extern const struct unidata *const unidata[]; | |
18cda350 RK |
93 | extern const struct unicode_utf8_row { |
94 | uint8_t count; | |
95 | uint8_t min2, max2; | |
96 | } unicode_utf8_valid[]; | |
1a05e381 | 97 | #define UNICODE_NCHARS 1114112 |
c2e01e0a | 98 | #define UNICODE_MODULUS 16 |
1a05e381 RK |
99 | #define UNICODE_BREAK_START 196608 |
100 | #define UNICODE_BREAK_END 917504 | |
101 | #define UNICODE_BREAK_TOP 918016 | |
61507e3c | 102 | #endif |