Commit | Line | Data |
---|---|---|
132a5a4a RK |
1 | /** @file lib/unidata.h |
2 | * @brief Unicode tables | |
3 | * | |
4 | * Automatically generated file, see scripts/make-unidata | |
5 | * | |
6 | * DO NOT EDIT. | |
7 | */ | |
61507e3c RK |
8 | #ifndef UNIDATA_H |
9 | #define UNIDATA_H | |
14523635 RK |
10 | enum unicode_General_Category { |
11 | unicode_General_Category_Cc, | |
12 | unicode_General_Category_Cf, | |
13 | unicode_General_Category_Cn, | |
14 | unicode_General_Category_Co, | |
15 | unicode_General_Category_Cs, | |
16 | unicode_General_Category_Ll, | |
17 | unicode_General_Category_Lm, | |
18 | unicode_General_Category_Lo, | |
19 | unicode_General_Category_Lt, | |
20 | unicode_General_Category_Lu, | |
21 | unicode_General_Category_Mc, | |
22 | unicode_General_Category_Me, | |
23 | unicode_General_Category_Mn, | |
24 | unicode_General_Category_Nd, | |
25 | unicode_General_Category_Nl, | |
26 | unicode_General_Category_No, | |
27 | unicode_General_Category_Pc, | |
28 | unicode_General_Category_Pd, | |
29 | unicode_General_Category_Pe, | |
30 | unicode_General_Category_Pf, | |
31 | unicode_General_Category_Pi, | |
32 | unicode_General_Category_Po, | |
33 | unicode_General_Category_Ps, | |
34 | unicode_General_Category_Sc, | |
35 | unicode_General_Category_Sk, | |
36 | unicode_General_Category_Sm, | |
37 | unicode_General_Category_So, | |
38 | unicode_General_Category_Zl, | |
39 | unicode_General_Category_Zp, | |
40 | unicode_General_Category_Zs | |
61507e3c | 41 | }; |
349b7b74 RK |
42 | enum unicode_Grapheme_Break { |
43 | unicode_Grapheme_Break_CR, | |
44 | unicode_Grapheme_Break_Control, | |
45 | unicode_Grapheme_Break_Extend, | |
46 | unicode_Grapheme_Break_L, | |
47 | unicode_Grapheme_Break_LF, | |
0e843521 | 48 | unicode_Grapheme_Break_LV, |
1a05e381 | 49 | unicode_Grapheme_Break_LVT, |
349b7b74 | 50 | unicode_Grapheme_Break_Other, |
a2a528f2 RK |
51 | unicode_Grapheme_Break_Prepend, |
52 | unicode_Grapheme_Break_SpacingMark, | |
349b7b74 RK |
53 | unicode_Grapheme_Break_T, |
54 | unicode_Grapheme_Break_V | |
55 | }; | |
56 | extern const char *const unicode_Grapheme_Break_names[]; | |
0b7052da RK |
57 | enum unicode_Word_Break { |
58 | unicode_Word_Break_ALetter, | |
a2a528f2 | 59 | unicode_Word_Break_CR, |
0b7052da RK |
60 | unicode_Word_Break_Extend, |
61 | unicode_Word_Break_ExtendNumLet, | |
62 | unicode_Word_Break_Format, | |
63 | unicode_Word_Break_Katakana, | |
a2a528f2 | 64 | unicode_Word_Break_LF, |
0b7052da RK |
65 | unicode_Word_Break_MidLetter, |
66 | unicode_Word_Break_MidNum, | |
a2a528f2 RK |
67 | unicode_Word_Break_MidNumLet, |
68 | unicode_Word_Break_Newline, | |
0b7052da RK |
69 | unicode_Word_Break_Numeric, |
70 | unicode_Word_Break_Other | |
71 | }; | |
bb48024f | 72 | extern const char *const unicode_Word_Break_names[]; |
349b7b74 RK |
73 | enum unicode_Sentence_Break { |
74 | unicode_Sentence_Break_ATerm, | |
a2a528f2 | 75 | unicode_Sentence_Break_CR, |
349b7b74 RK |
76 | unicode_Sentence_Break_Close, |
77 | unicode_Sentence_Break_Extend, | |
78 | unicode_Sentence_Break_Format, | |
a2a528f2 | 79 | unicode_Sentence_Break_LF, |
349b7b74 RK |
80 | unicode_Sentence_Break_Lower, |
81 | unicode_Sentence_Break_Numeric, | |
82 | unicode_Sentence_Break_OLetter, | |
83 | unicode_Sentence_Break_Other, | |
a2a528f2 | 84 | unicode_Sentence_Break_SContinue, |
349b7b74 RK |
85 | unicode_Sentence_Break_STerm, |
86 | unicode_Sentence_Break_Sep, | |
87 | unicode_Sentence_Break_Sp, | |
88 | unicode_Sentence_Break_Upper | |
89 | }; | |
90 | extern const char *const unicode_Sentence_Break_names[]; | |
e5a5a138 | 91 | enum unicode_flags { |
f98fcddb RK |
92 | unicode_normalize_before_casefold = 1, |
93 | unicode_compatibility_decomposition = 2 | |
e5a5a138 RK |
94 | }; |
95 | ||
61507e3c | 96 | struct unidata { |
f98fcddb | 97 | const uint32_t *decomp; |
e5a5a138 | 98 | const uint32_t *casefold; |
99695df9 | 99 | const uint32_t *composed; |
e5a5a138 | 100 | unsigned char ccc; |
14523635 | 101 | char general_category; |
e5a5a138 | 102 | uint8_t flags; |
349b7b74 | 103 | char grapheme_break; |
0b7052da | 104 | char word_break; |
349b7b74 | 105 | char sentence_break; |
61507e3c RK |
106 | }; |
107 | extern const struct unidata *const unidata[]; | |
18cda350 RK |
108 | extern const struct unicode_utf8_row { |
109 | uint8_t count; | |
110 | uint8_t min2, max2; | |
111 | } unicode_utf8_valid[]; | |
1a05e381 | 112 | #define UNICODE_NCHARS 1114112 |
c2e01e0a | 113 | #define UNICODE_MODULUS 16 |
1a05e381 RK |
114 | #define UNICODE_BREAK_START 196608 |
115 | #define UNICODE_BREAK_END 917504 | |
116 | #define UNICODE_BREAK_TOP 918016 | |
61507e3c | 117 | #endif |